lost linestart
[mediawiki.git] / includes / Parser.php
blob61d893cfdf8c94eeb4a069e093f1c769b8c51ab8
1 <?php
3 # Globals used:
4 # major: $wgUser, $wgTitle,
5 # minor: $wgUseTex
7 class Parser
9 var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
11 function Parser()
13 $this->clearState();
16 function clearState()
18 $this->mOutput = new ParserOutput;
19 $this->mAutonumber = 0;
20 $this->mLastSection = "";
21 $this->mDTopen = false;
24 # First pass--just handle <nowiki> sections, pass the rest off
25 # to doWikiPass2() which does all the real work.
27 # Returns a ParserOutput
29 function parse( $text, $linestart = true, $clearState = true )
31 global $wgUseTeX;
32 $fname = "Parser::parse";
33 wfProfileIn( $fname );
34 $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
35 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
36 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
37 $nwlist = array();
38 $nwsecs = 0;
39 $mathlist = array();
40 $mathsecs = 0;
41 $prelist = array ();
42 $presecs = 0;
43 $stripped = "";
44 $stripped2 = "";
45 $stripped3 = "";
47 if ( $clearState ) {
48 $this->clearState();
51 # Replace any instances of the placeholders
52 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
53 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
54 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
57 global $wgEnableParserCache;
58 $use_parser_cache =
59 $wgEnableParserCache && $action == "view" &&
60 intval($wgUser->getOption( "stubthreshold" )) == 0 &&
61 is_object($article) && $article->getID() > 0;
63 if( $use_parser_cache ){
64 if( $this->fillFromParserCache() ){
65 wfProfileOut( $fname );
66 return;
71 while ( "" != $text ) {
72 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
73 $stripped .= $p[0];
74 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $text = ""; }
75 else {
76 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
77 ++$nwsecs;
78 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
79 $stripped .= $unique . $nwsecs . "s";
80 $text = $q[1];
84 if( $wgUseTeX ) {
85 while ( "" != $stripped ) {
86 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
87 $stripped2 .= $p[0];
88 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped = ""; }
89 else {
90 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
91 ++$mathsecs;
92 $mathlist[$mathsecs] = renderMath($q[0]);
93 $stripped2 .= $unique2 . $mathsecs . "s";
94 $stripped = $q[1];
97 } else {
98 $stripped2 = $stripped;
101 while ( "" != $stripped2 ) {
102 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
103 $stripped3 .= $p[0];
104 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped2 = ""; }
105 else {
106 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
107 ++$presecs;
108 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
109 $stripped3 .= $unique3 . $presecs . "s";
110 $stripped2 = $q[1];
114 $text = $this->doWikiPass2( $stripped3, $linestart );
116 $specialChars = array("\\", "$");
117 $escapedChars = array("\\\\", "\\$");
119 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
120 for ( $i = $presecs; $i >= 1; --$i ) {
121 $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
122 $escapedChars, $prelist[$i] ), $text );
125 for ( $i = $mathsecs; $i >= 1; --$i ) {
126 $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
127 $escapedChars, $mathlist[$i] ), $text );
130 for ( $i = $nwsecs; $i >= 1; --$i ) {
131 $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
132 $escapedChars, $nwlist[$i] ), $text );
136 if($use_parser_cache ){
137 $this->saveParserCache( $text );
141 $this->mOutput->setText( $text );
142 wfProfileOut( $fname );
143 return $this->mOutput;
146 function categoryMagic ()
148 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
149 if ( !isset ( $wgUseCategoryMagic ) || !$wgUseCategoryMagic ) return ;
150 $id = $wgTitle->getArticleID() ;
151 $cat = ucfirst ( wfMsg ( "category" ) ) ;
152 $ti = $wgTitle->getText() ;
153 $ti = explode ( ":" , $ti , 2 ) ;
154 if ( $cat != $ti[0] ) return "" ;
155 $r = "<br break=all>\n" ;
157 $articles = array() ;
158 $parents = array () ;
159 $children = array() ;
162 global $wgUser ;
163 $sk = $wgUser->getSkin() ;
165 $doesexist = false ;
166 if ( $doesexist ) {
167 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
168 } else {
169 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
172 $res = wfQuery ( $sql, DB_READ ) ;
173 while ( $x = wfFetchObject ( $res ) )
175 # $t = new Title ;
176 # $t->newFromDBkey ( $x->l_from ) ;
177 # $t = $t->getText() ;
178 if ( $doesexist ) {
179 $t = $x->l_from ;
180 } else {
181 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
182 if ( $t != "" ) $t .= ":" ;
183 $t .= $x->cur_title ;
186 $y = explode ( ":" , $t , 2 ) ;
187 if ( count ( $y ) == 2 && $y[0] == $cat ) {
188 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
189 } else {
190 array_push ( $articles , $sk->makeLink ( $t ) ) ;
193 wfFreeResult ( $res ) ;
195 # Children
196 if ( count ( $children ) > 0 )
198 asort ( $children ) ;
199 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
200 $r .= implode ( ", " , $children ) ;
203 # Articles
204 if ( count ( $articles ) > 0 )
206 asort ( $articles ) ;
207 $h = wfMsg( "category_header", $ti[1] );
208 $r .= "<h2>{$h}</h2>\n" ;
209 $r .= implode ( ", " , $articles ) ;
213 return $r ;
216 function getHTMLattrs ()
218 $htmlattrs = array( # Allowed attributes--no scripting, etc.
219 "title", "align", "lang", "dir", "width", "height",
220 "bgcolor", "clear", /* BR */ "noshade", /* HR */
221 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
222 /* FONT */ "type", "start", "value", "compact",
223 /* For various lists, mostly deprecated but safe */
224 "summary", "width", "border", "frame", "rules",
225 "cellspacing", "cellpadding", "valign", "char",
226 "charoff", "colgroup", "col", "span", "abbr", "axis",
227 "headers", "scope", "rowspan", "colspan", /* Tables */
228 "id", "class", "name", "style" /* For CSS */
230 return $htmlattrs ;
233 function fixTagAttributes ( $t )
235 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
236 $htmlattrs = $this->getHTMLattrs() ;
238 # Strip non-approved attributes from the tag
239 $t = preg_replace(
240 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
241 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
242 $t);
243 # Strip javascript "expression" from stylesheets. Brute force approach:
244 # If anythin offensive is found, all attributes of the HTML tag are dropped
246 if( preg_match(
247 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
248 wfMungeToUtf8( $t ) ) )
250 $t="";
253 return trim ( $t ) ;
256 function doTableStuff ( $t )
258 $t = explode ( "\n" , $t ) ;
259 $td = array () ; # Is currently a td tag open?
260 $ltd = array () ; # Was it TD or TH?
261 $tr = array () ; # Is currently a tr tag open?
262 $ltr = array () ; # tr attributes
263 foreach ( $t AS $k => $x )
265 $x = rtrim ( $x ) ;
266 $fc = substr ( $x , 0 , 1 ) ;
267 if ( "{|" == substr ( $x , 0 , 2 ) )
269 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
270 array_push ( $td , false ) ;
271 array_push ( $ltd , "" ) ;
272 array_push ( $tr , false ) ;
273 array_push ( $ltr , "" ) ;
275 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
276 else if ( "|}" == substr ( $x , 0 , 2 ) )
278 $z = "</table>\n" ;
279 $l = array_pop ( $ltd ) ;
280 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
281 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
282 array_pop ( $ltr ) ;
283 $t[$k] = $z ;
285 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
287 $z = trim ( substr ( $x , 2 ) ) ;
288 $t[$k] = "<caption>{$z}</caption>\n" ;
290 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
292 $x = substr ( $x , 1 ) ;
293 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
294 $z = "" ;
295 $l = array_pop ( $ltd ) ;
296 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
297 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
298 array_pop ( $ltr ) ;
299 $t[$k] = $z ;
300 array_push ( $tr , false ) ;
301 array_push ( $td , false ) ;
302 array_push ( $ltd , "" ) ;
303 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
305 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
307 if ( "|+" == substr ( $x , 0 , 2 ) )
309 $fc = "+" ;
310 $x = substr ( $x , 1 ) ;
312 $after = substr ( $x , 1 ) ;
313 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
314 $after = explode ( "||" , $after ) ;
315 $t[$k] = "" ;
316 foreach ( $after AS $theline )
318 $z = "" ;
319 if ( $fc != "+" )
321 $tra = array_pop ( $ltr ) ;
322 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
323 array_push ( $tr , true ) ;
324 array_push ( $ltr , "" ) ;
327 $l = array_pop ( $ltd ) ;
328 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
329 if ( $fc == "|" ) $l = "TD" ;
330 else if ( $fc == "!" ) $l = "TH" ;
331 else if ( $fc == "+" ) $l = "CAPTION" ;
332 else $l = "" ;
333 array_push ( $ltd , $l ) ;
334 $y = explode ( "|" , $theline , 2 ) ;
335 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
336 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
337 $t[$k] .= $y ;
338 array_push ( $td , true ) ;
343 # Closing open td, tr && table
344 while ( count ( $td ) > 0 )
346 if ( array_pop ( $td ) ) $t[] = "</td>" ;
347 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
348 $t[] = "</table>" ;
351 $t = implode ( "\n" , $t ) ;
352 # $t = $this->removeHTMLtags( $t );
353 return $t ;
356 # Well, OK, it's actually about 14 passes. But since all the
357 # hard lifting is done inside PHP's regex code, it probably
358 # wouldn't speed things up much to add a real parser.
360 function doWikiPass2( $text, $linestart )
362 global $wgUser, $wgLang, $wgUseDynamicDates;
363 $fname = "OutputPage::doWikiPass2";
364 wfProfileIn( $fname );
366 $text = $this->removeHTMLtags( $text );
367 $text = $this->replaceVariables( $text );
369 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
370 $text = str_replace ( "<HR>", "<hr>", $text );
372 $text = $this->doAllQuotes( $text );
373 $text = $this->doHeadings( $text );
374 $text = $this->doBlockLevels( $text, $linestart );
376 if($wgUseDynamicDates) {
377 global $wgDateFormatter;
378 $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
381 $text = $this->replaceExternalLinks( $text );
382 $text = $this->replaceInternalLinks ( $text );
383 $text = $this->doTableStuff ( $text ) ;
385 $text = $this->magicISBN( $text );
386 $text = $this->magicRFC( $text );
387 $text = $this->formatHeadings( $text );
389 $sk = $wgUser->getSkin();
390 $text = $sk->transformContent( $text );
391 $text .= $this->categoryMagic () ;
393 wfProfileOut( $fname );
394 return $text;
397 /* private */ function doAllQuotes( $text )
399 $outtext = "";
400 $lines = explode( "\r\n", $text );
401 foreach ( $lines as $line ) {
402 $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
404 return $outtext;
407 /* private */ function doQuotes( $pre, $text, $mode )
409 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
410 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
411 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
412 if ( substr ($m[2], 0, 1) == "'" ) {
413 $m[2] = substr ($m[2], 1);
414 if ($mode == "em") {
415 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
416 } else if ($mode == "strong") {
417 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
418 } else if (($mode == "emstrong") || ($mode == "both")) {
419 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
420 } else if ($mode == "strongem") {
421 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
422 } else {
423 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
425 } else {
426 if ($mode == "strong") {
427 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
428 } else if ($mode == "em") {
429 return $m1_em . $this->doQuotes ( "", $m[2], "" );
430 } else if ($mode == "emstrong") {
431 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
432 } else if (($mode == "strongem") || ($mode == "both")) {
433 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
434 } else {
435 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
438 } else {
439 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
440 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
441 if ($mode == "") {
442 return $pre . $text;
443 } else if ($mode == "em") {
444 return $pre . $text_em;
445 } else if ($mode == "strong") {
446 return $pre . $text_strong;
447 } else if ($mode == "strongem") {
448 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
449 } else {
450 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
455 /* private */ function doHeadings( $text )
457 for ( $i = 6; $i >= 1; --$i ) {
458 $h = substr( "======", 0, $i );
459 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
460 "<h{$i}>\\1</h{$i}>\\2", $text );
462 return $text;
465 # Note: we have to do external links before the internal ones,
466 # and otherwise take great care in the order of things here, so
467 # that we don't end up interpreting some URLs twice.
469 /* private */ function replaceExternalLinks( $text )
471 $fname = "OutputPage::replaceExternalLinks";
472 wfProfileIn( $fname );
473 $text = $this->subReplaceExternalLinks( $text, "http", true );
474 $text = $this->subReplaceExternalLinks( $text, "https", true );
475 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
476 $text = $this->subReplaceExternalLinks( $text, "irc", false );
477 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
478 $text = $this->subReplaceExternalLinks( $text, "news", false );
479 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
480 wfProfileOut( $fname );
481 return $text;
484 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
486 global $wgUser, $printable;
487 global $wgAllowExternalImages;
490 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
491 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
493 # this is the list of separators that should be ignored if they
494 # are the last character of an URL but that should be included
495 # if they occur within the URL, e.g. "go to www.foo.com, where .."
496 # in this case, the last comma should not become part of the URL,
497 # but in "www.foo.com/123,2342,32.htm" it should.
498 $sep = ",;\.:";
499 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
500 $images = "gif|png|jpg|jpeg";
502 # PLEASE NOTE: The curly braces { } are not part of the regex,
503 # they are interpreted as part of the string (used to tell PHP
504 # that the content of the string should be inserted there).
505 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
506 "((?i){$images})([^{$uc}]|$)/";
508 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
509 $sk = $wgUser->getSkin();
511 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
512 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
513 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
515 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
516 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
517 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
518 "</a>\\5", $s );
519 $s = str_replace( $unique, $protocol, $s );
521 $a = explode( "[{$protocol}:", " " . $s );
522 $s = array_shift( $a );
523 $s = substr( $s, 1 );
525 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
526 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
528 foreach ( $a as $line ) {
529 if ( preg_match( $e1, $line, $m ) ) {
530 $link = "{$protocol}:{$m[1]}";
531 $trail = $m[2];
532 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
533 else { $text = wfEscapeHTML( $link ); }
534 } else if ( preg_match( $e2, $line, $m ) ) {
535 $link = "{$protocol}:{$m[1]}";
536 $text = $m[2];
537 $trail = $m[3];
538 } else {
539 $s .= "[{$protocol}:" . $line;
540 continue;
542 if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
543 else $paren = "";
544 $la = $sk->getExternalLinkAttributes( $link, $text );
545 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
548 return $s;
551 /* private */ function replaceInternalLinks( $s )
553 global $wgTitle, $wgUser, $wgLang;
554 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
555 global $wgNamespacesWithSubpages, $wgLanguageCode;
556 global $wgUseLinkPrefixCombination;
557 wfProfileIn( $fname = "OutputPage::replaceInternalLinks" );
559 wfProfileIn( "$fname-setup" );
560 $tc = Title::legalChars() . "#";
561 $sk = $wgUser->getSkin();
563 $a = explode( "[[", " " . $s );
564 $s = array_shift( $a );
565 $s = substr( $s, 1 );
567 # Match a link having the form [[namespace:link|alternate]]trail
568 $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD";
569 # Match the end of a line for a word that's not followed by whitespace,
570 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
571 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
572 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
573 $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
576 # Special and Media are pseudo-namespaces; no pages actually exist in them
577 $image = Namespace::getImage();
578 $special = Namespace::getSpecial();
579 $media = Namespace::getMedia();
580 $category = wfMsg ( "category" ) ;
581 $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() );
583 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
584 $new_prefix = $m[2];
585 $s = $m[1];
586 } else {
587 $new_prefix="";
590 wfProfileOut( "$fname-setup" );
592 foreach ( $a as $line ) {
593 $prefix = $new_prefix;
594 if ( $wgUseLinkPrefixCombination && preg_match( $e2, $line, $m ) ) {
595 $new_prefix = $m[2];
596 $line = $m[1];
597 } else {
598 $new_prefix = "";
600 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
601 $text = $m[2];
602 $trail = $m[3];
603 } else { # Invalid form; output directly
604 $s .= $prefix . "[[" . $line ;
605 continue;
608 /* Valid link forms:
609 Foobar -- normal
610 :Foobar -- override special treatment of prefix (images, language links)
611 /Foobar -- convert to CurrentPage/Foobar
612 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
614 $c = substr($m[1],0,1);
615 $noforce = ($c != ":");
616 if( $c == "/" ) { # subpage
617 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
618 $m[1]=substr($m[1],1,strlen($m[1])-2);
619 $noslash=$m[1];
620 } else {
621 $noslash=substr($m[1],1);
623 if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
624 $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
625 if( "" == $text ) {
626 $text= $m[1];
627 } # this might be changed for ugliness reasons
628 } else {
629 $link = $noslash; # no subpage allowed, use standard link
631 } elseif( $noforce ) { # no subpage
632 $link = $m[1];
633 } else {
634 $link = substr( $m[1], 1 );
636 if( "" == $text )
637 $text = $link;
639 $nt = Title::newFromText( $link );
640 if( !$nt ) {
641 $s .= $prefix . "[[" . $line;
642 continue;
644 $ns = $nt->getNamespace();
645 $iw = $nt->getInterWiki();
646 if( $noforce ) {
647 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
648 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
649 $s .= $prefix . $trail;
650 continue;
652 if( $ns == $image ) {
653 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
654 $wgLinkCache->addImageLinkObj( $nt );
655 continue;
658 if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
659 ( strpos( $link, "#" ) == FALSE ) ) {
660 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
661 continue;
663 if ( $ns == $category && $wgUseCategoryMagic ) {
664 $t = explode ( ":" , $nt->getText() ) ;
665 array_shift ( $t ) ;
666 $t = implode ( ":" , $t ) ;
667 $t = $wgLang->ucFirst ( $t ) ;
668 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
669 $nnt = Title::newFromText ( $category.":".$t ) ;
670 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
671 $this->mCategoryLinks[] = $t ;
672 $s .= $prefix . $trail ;
673 continue ;
675 if( $ns == $media ) {
676 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
677 $wgLinkCache->addImageLinkObj( $nt );
678 continue;
679 } elseif( $ns == $special ) {
680 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
681 continue;
683 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
685 wfProfileOut( $fname );
686 return $s;
689 # Some functions here used by doBlockLevels()
691 /* private */ function closeParagraph()
693 $result = "";
694 if ( 0 != strcmp( "p", $this->mLastSection ) &&
695 0 != strcmp( "", $this->mLastSection ) ) {
696 $result = "</" . $this->mLastSection . ">";
698 $this->mLastSection = "";
699 return $result."\n";
701 # getCommon() returns the length of the longest common substring
702 # of both arguments, starting at the beginning of both.
704 /* private */ function getCommon( $st1, $st2 )
706 $fl = strlen( $st1 );
707 $shorter = strlen( $st2 );
708 if ( $fl < $shorter ) { $shorter = $fl; }
710 for ( $i = 0; $i < $shorter; ++$i ) {
711 if ( $st1{$i} != $st2{$i} ) { break; }
713 return $i;
715 # These next three functions open, continue, and close the list
716 # element appropriate to the prefix character passed into them.
718 /* private */ function openList( $char )
720 $result = $this->closeParagraph();
722 if ( "*" == $char ) { $result .= "<ul><li>"; }
723 else if ( "#" == $char ) { $result .= "<ol><li>"; }
724 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
725 else if ( ";" == $char ) {
726 $result .= "<dl><dt>";
727 $this->mDTopen = true;
729 else { $result = "<!-- ERR 1 -->"; }
731 return $result;
734 /* private */ function nextItem( $char )
736 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
737 else if ( ":" == $char || ";" == $char ) {
738 $close = "</dd>";
739 if ( $this->mDTopen ) { $close = "</dt>"; }
740 if ( ";" == $char ) {
741 $this->mDTopen = true;
742 return $close . "<dt>";
743 } else {
744 $this->mDTopen = false;
745 return $close . "<dd>";
748 return "<!-- ERR 2 -->";
751 /* private */function closeList( $char )
753 if ( "*" == $char ) { $text = "</li></ul>"; }
754 else if ( "#" == $char ) { $text = "</li></ol>"; }
755 else if ( ":" == $char ) {
756 if ( $this->mDTopen ) {
757 $this->mDTopen = false;
758 $text = "</dt></dl>";
759 } else {
760 $text = "</dd></dl>";
763 else { return "<!-- ERR 3 -->"; }
764 return $text."\n";
767 /* private */ function doBlockLevels( $text, $linestart )
769 $fname = "OutputPage::doBlockLevels";
770 wfProfileIn( $fname );
771 # Parsing through the text line by line. The main thing
772 # happening here is handling of block-level elements p, pre,
773 # and making lists from lines starting with * # : etc.
775 $a = explode( "\n", $text );
776 $text = $lastPref = "";
777 $this->mDTopen = $inBlockElem = false;
779 if ( ! $linestart ) { $text .= array_shift( $a ); }
780 foreach ( $a as $t ) {
781 if ( "" != $text ) { $text .= "\n"; }
783 $oLine = $t;
784 $opl = strlen( $lastPref );
785 $npl = strspn( $t, "*#:;" );
786 $pref = substr( $t, 0, $npl );
787 $pref2 = str_replace( ";", ":", $pref );
788 $t = substr( $t, $npl );
790 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
791 $text .= $this->nextItem( substr( $pref, -1 ) );
793 if ( ";" == substr( $pref, -1 ) ) {
794 $cpos = strpos( $t, ":" );
795 if ( ! ( false === $cpos ) ) {
796 $term = substr( $t, 0, $cpos );
797 $text .= $term . $this->nextItem( ":" );
798 $t = substr( $t, $cpos + 1 );
801 } else if (0 != $npl || 0 != $opl) {
802 $cpl = $this->getCommon( $pref, $lastPref );
804 while ( $cpl < $opl ) {
805 $text .= $this->closeList( $lastPref{$opl-1} );
806 --$opl;
808 if ( $npl <= $cpl && $cpl > 0 ) {
809 $text .= $this->nextItem( $pref{$cpl-1} );
811 while ( $npl > $cpl ) {
812 $char = substr( $pref, $cpl, 1 );
813 $text .= $this->openList( $char );
815 if ( ";" == $char ) {
816 $cpos = strpos( $t, ":" );
817 if ( ! ( false === $cpos ) ) {
818 $term = substr( $t, 0, $cpos );
819 $text .= $term . $this->nextItem( ":" );
820 $t = substr( $t, $cpos + 1 );
823 ++$cpl;
825 $lastPref = $pref2;
827 if ( 0 == $npl ) { # No prefix--go to paragraph mode
828 if ( preg_match(
829 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
830 $text .= $this->closeParagraph();
831 $inBlockElem = true;
833 if ( ! $inBlockElem ) {
834 if ( " " == $t{0} ) {
835 $newSection = "pre";
836 # $t = wfEscapeHTML( $t );
838 else { $newSection = "p"; }
840 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
841 $text .= $this->closeParagraph();
842 $text .= "<" . $newSection . ">";
843 } else if ( 0 != strcmp( $this->mLastSection,
844 $newSection ) ) {
845 $text .= $this->closeParagraph();
846 if ( 0 != strcmp( "p", $newSection ) ) {
847 $text .= "<" . $newSection . ">";
850 $this->mLastSection = $newSection;
852 if ( $inBlockElem &&
853 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
854 $inBlockElem = false;
857 $text .= $t;
859 while ( $npl ) {
860 $text .= $this->closeList( $pref2{$npl-1} );
861 --$npl;
863 if ( "" != $this->mLastSection ) {
864 if ( "p" != $this->mLastSection ) {
865 $text .= "</" . $this->mLastSection . ">";
867 $this->mLastSection = "";
869 wfProfileOut( $fname );
870 return $text;
873 /* private */ function replaceVariables( $text )
875 global $wgLang, $wgCurOut;
876 $fname = "OutputPage::replaceVariables";
877 wfProfileIn( $fname );
879 $magic = array();
881 # Basic variables
882 # See Language.php for the definition of each magic word
883 # As with sigs, this uses the server's local time -- ensure
884 # this is appropriate for your audience!
886 $magic[MAG_CURRENTMONTH] = date( "m" );
887 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
888 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
889 $magic[MAG_CURRENTDAY] = date("j");
890 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
891 $magic[MAG_CURRENTYEAR] = date( "Y" );
892 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
894 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
896 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
897 if ( $mw->match( $text ) ) {
898 $v = wfNumberOfArticles();
899 $text = $mw->replace( $v, $text );
900 if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
903 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
904 # The callbacks are at the bottom of this file
905 $wgCurOut = $this;
906 $mw =& MagicWord::get( MAG_MSG );
907 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
908 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
910 $mw =& MagicWord::get( MAG_MSGNW );
911 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
912 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
914 wfProfileOut( $fname );
915 return $text;
918 # Cleans up HTML, removes dangerous tags and attributes
919 /* private */ function removeHTMLtags( $text )
921 $fname = "OutputPage::removeHTMLtags";
922 wfProfileIn( $fname );
923 $htmlpairs = array( # Tags that must be closed
924 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
925 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
926 "strike", "strong", "tt", "var", "div", "center",
927 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
928 "ruby", "rt" , "rb" , "rp"
930 $htmlsingle = array(
931 "br", "p", "hr", "li", "dt", "dd"
933 $htmlnest = array( # Tags that can be nested--??
934 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
935 "dl", "font", "big", "small", "sub", "sup"
937 $tabletags = array( # Can only appear inside table
938 "td", "th", "tr"
941 $htmlsingle = array_merge( $tabletags, $htmlsingle );
942 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
944 $htmlattrs = $this->getHTMLattrs () ;
946 # Remove HTML comments
947 $text = preg_replace( "/<!--.*-->/sU", "", $text );
949 $bits = explode( "<", $text );
950 $text = array_shift( $bits );
951 $tagstack = array(); $tablestack = array();
953 foreach ( $bits as $x ) {
954 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
955 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
956 $x, $regs );
957 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
958 error_reporting( $prev );
960 $badtag = 0 ;
961 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
962 # Check our stack
963 if ( $slash ) {
964 # Closing a tag...
965 if ( ! in_array( $t, $htmlsingle ) &&
966 ( $ot = array_pop( $tagstack ) ) != $t ) {
967 array_push( $tagstack, $ot );
968 $badtag = 1;
969 } else {
970 if ( $t == "table" ) {
971 $tagstack = array_pop( $tablestack );
973 $newparams = "";
975 } else {
976 # Keep track for later
977 if ( in_array( $t, $tabletags ) &&
978 ! in_array( "table", $tagstack ) ) {
979 $badtag = 1;
980 } else if ( in_array( $t, $tagstack ) &&
981 ! in_array ( $t , $htmlnest ) ) {
982 $badtag = 1 ;
983 } else if ( ! in_array( $t, $htmlsingle ) ) {
984 if ( $t == "table" ) {
985 array_push( $tablestack, $tagstack );
986 $tagstack = array();
988 array_push( $tagstack, $t );
990 # Strip non-approved attributes from the tag
991 $newparams = $this->fixTagAttributes($params);
994 if ( ! $badtag ) {
995 $rest = str_replace( ">", "&gt;", $rest );
996 $text .= "<$slash$t $newparams$brace$rest";
997 continue;
1000 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1002 # Close off any remaining tags
1003 while ( $t = array_pop( $tagstack ) ) {
1004 $text .= "</$t>\n";
1005 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1007 wfProfileOut( $fname );
1008 return $text;
1013 * This function accomplishes several tasks:
1014 * 1) Auto-number headings if that option is enabled
1015 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1016 * 3) Add a Table of contents on the top for users who have enabled the option
1017 * 4) Auto-anchor headings
1019 * It loops through all headlines, collects the necessary data, then splits up the
1020 * string and re-inserts the newly formatted headlines.
1022 * */
1023 /* private */ function formatHeadings( $text )
1025 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1026 $nh=$wgUser->getOption( "numberheadings" );
1027 $st=$wgUser->getOption( "showtoc" );
1028 if(!$wgTitle->userCanEdit()) {
1029 $es=0;
1030 $esr=0;
1031 } else {
1032 $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1033 $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1036 # Inhibit editsection links if requested in the page
1037 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1038 if ($esw->matchAndRemove( $text )) {
1039 $es=0;
1041 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1042 # do not add TOC
1043 $mw =& MagicWord::get( MAG_NOTOC );
1044 if ($mw->matchAndRemove( $text ))
1046 $st = 0;
1049 # never add the TOC to the Main Page. This is an entry page that should not
1050 # be more than 1-2 screens large anyway
1051 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1053 # We need this to perform operations on the HTML
1054 $sk=$wgUser->getSkin();
1056 # Get all headlines for numbering them and adding funky stuff like [edit]
1057 # links
1058 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1060 # headline counter
1061 $c=0;
1063 # Ugh .. the TOC should have neat indentation levels which can be
1064 # passed to the skin functions. These are determined here
1065 foreach($matches[3] as $headline) {
1066 if($level) { $prevlevel=$level;}
1067 $level=$matches[1][$c];
1068 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1070 $h[$level]=0; // reset when we enter a new level
1071 $toc.=$sk->tocIndent($level-$prevlevel);
1072 $toclevel+=$level-$prevlevel;
1075 if(($nh||$st) && $level<$prevlevel) {
1076 $h[$level+1]=0; // reset when we step back a level
1077 $toc.=$sk->tocUnindent($prevlevel-$level);
1078 $toclevel-=$prevlevel-$level;
1081 $h[$level]++; // count number of headlines for each level
1083 if($nh||$st) {
1084 for($i=1;$i<=$level;$i++) {
1085 if($h[$i]) {
1086 if($dot) {$numbering.=".";}
1087 $numbering.=$h[$i];
1088 $dot=1;
1093 // The canonized header is a version of the header text safe to use for links
1095 $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1096 $tocline = trim( $canonized_headline );
1097 $canonized_headline=str_replace('"',"",$canonized_headline);
1098 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1099 $refer[$c]=$canonized_headline;
1100 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1101 $refcount[$c]=$refers[$canonized_headline];
1103 // Prepend the number to the heading text
1105 if($nh||$st) {
1106 $tocline=$numbering ." ". $tocline;
1108 // Don't number the heading if it is the only one (looks silly)
1109 if($nh && count($matches[3]) > 1) {
1110 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1114 // Create the anchor for linking from the TOC to the section
1116 $anchor=$canonized_headline;
1117 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1118 if($st) {
1119 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1121 if($es && !isset($wpPreview)) {
1122 $head[$c].=$sk->editSectionLink($c+1);
1125 // Put it all together
1127 $head[$c].="<h".$level.$matches[2][$c]
1128 ."<a name=\"".$anchor."\">"
1129 .$headline
1130 ."</a>"
1131 ."</h".$level.">";
1133 // Add the edit section link
1135 if($esr && !isset($wpPreview)) {
1136 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1139 $numbering="";
1140 $c++;
1141 $dot=0;
1144 if($st) {
1145 $toclines=$c;
1146 $toc.=$sk->tocUnindent($toclevel);
1147 $toc=$sk->tocTable($toc);
1150 // split up and insert constructed headlines
1152 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1153 $i=0;
1155 foreach($blocks as $block) {
1156 if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1157 # This is the [edit] link that appears for the top block of text when
1158 # section editing is enabled
1159 $full.=$sk->editSectionLink(0);
1161 $full.=$block;
1162 if($st && $toclines>3 && !$i) {
1163 # Let's add a top anchor just in case we want to link to the top of the page
1164 $full="<a name=\"top\"></a>".$full.$toc;
1167 $full.=$head[$i];
1168 $i++;
1171 return $full;
1174 /* private */ function magicISBN( $text )
1176 global $wgLang;
1178 $a = split( "ISBN ", " $text" );
1179 if ( count ( $a ) < 2 ) return $text;
1180 $text = substr( array_shift( $a ), 1);
1181 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1183 foreach ( $a as $x ) {
1184 $isbn = $blank = "" ;
1185 while ( " " == $x{0} ) {
1186 $blank .= " ";
1187 $x = substr( $x, 1 );
1189 while ( strstr( $valid, $x{0} ) != false ) {
1190 $isbn .= $x{0};
1191 $x = substr( $x, 1 );
1193 $num = str_replace( "-", "", $isbn );
1194 $num = str_replace( " ", "", $num );
1196 if ( "" == $num ) {
1197 $text .= "ISBN $blank$x";
1198 } else {
1199 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1200 "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1201 $text .= $x;
1204 return $text;
1207 /* private */ function magicRFC( $text )
1209 return $text;
1215 class ParserOutput
1217 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1219 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1220 $containsOldMagic = false )
1222 $this->mText = $text;
1223 $this->mLanguageLinks = $languageLinks;
1224 $this->mCategoryLinks = $categoryLinks;
1225 $this->mContainsOldMagic = $containsOldMagic;
1228 function getText() { return $this->mText; }
1229 function getLanguageLinks() { return $this->mLanguageLinks; }
1230 function getCategoryLinks() { return $this->mCategoryLinks; }
1231 function containsOldMagic() { return $this->mContainsOldMagic; }
1232 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1233 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1234 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1235 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1238 # Regex callbacks, used in OutputPage::replaceVariables
1240 # Just get rid of the dangerous stuff
1241 # Necessary because replaceVariables is called after removeHTMLtags,
1242 # and message text can come from any user
1243 function wfReplaceMsgVar( $matches ) {
1244 global $wgCurOut, $wgLinkCache;
1245 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1246 $wgLinkCache->suspend();
1247 $text = $wgCurOut->replaceInternalLinks( $text );
1248 $wgLinkCache->resume();
1249 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1250 return $text;
1253 # Effective <nowiki></nowiki>
1254 # Not real <nowiki> because this is called after nowiki sections are processed
1255 function wfReplaceMsgnwVar( $matches ) {
1256 global $wgCurOut, $wgLinkCache;
1257 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1258 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1259 return $text;