5 * @subpackage Experimental
9 require_once ('Parser.php');
12 * This should one day become the XML->(X)HTML parser
13 * Based on work by Jan Hidders and Magnus Manske
15 * $wgUseXMLparser = true ;
16 * $wgEnableParserCache = false ;
17 * $wgWiki2xml to the path and executable of the command line version (cli)
18 * in LocalSettings.php
20 * @subpackage Experimental
24 * the base class for an element
26 * @subpackage Experimental
30 var $attrs = array ();
31 var $children = array ();
34 * This finds the ATTRS element and returns the ATTR sub-children as a single string
36 function getSourceAttrs() {
38 foreach ($this->children
as $child) {
39 if (!is_string($child) AND $child->name
== 'ATTRS') {
40 $ret = $child->makeXHTML($parser);
47 * This collects the ATTR thingies for getSourceAttrs()
49 function getTheseAttrs() {
51 foreach ($this->children
as $child) {
52 if (!is_string($child) AND $child->name
== 'ATTR') {
53 $ret[] = $child->attrs
["NAME"]."='".$child->children
[0]."'";
56 return implode(' ', $ret);
59 function fixLinkTails(& $parser, $key) {
61 if (!isset ($this->children
[$k2]))
63 if (!is_string($this->children
[$k2]))
65 if (is_string($this->children
[$key]))
67 if ($this->children
[$key]->name
!= "LINK")
70 $n = $this->children
[$k2];
72 while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) {
76 $this->children
[$k2] = $n;
78 if (count($this->children
[$key]->children
) > 1) {
79 $kl = array_keys($this->children
[$key]->children
);
81 $this->children
[$key]->children
[$kl]->children
[] = $s;
84 $e->name
= "LINKOPTION";
85 $t = $this->children
[$key]->sub_makeXHTML($parser);
86 $e->children
[] = trim($t).$s;
87 $this->children
[$key]->children
[] = $e;
92 * This function generates the XHTML for the entire subtree
94 function sub_makeXHTML(& $parser, $tag = '', $attr = '') {
97 $attr2 = $this->getSourceAttrs();
98 if ($attr != '' AND $attr2 != '')
109 # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
110 # foreach ( array_keys ( $this->children ) AS $x )
111 # $this->fixLinkTails ( $parser , $x ) ;
113 foreach ($this->children
as $key => $child) {
114 if (is_string($child)) {
116 } elseif ($child->name
!= 'ATTRS') {
117 $ret .= $child->makeXHTML($parser);
121 $ret .= '</'.$tag.">\n";
128 function createInternalLink(& $parser, $target, $display_title, $options) {
130 $skin = $wgUser->getSkin();
131 $tp = explode(':', $target); # tp = target parts
132 $title = ''; # The plain title
133 $language = ''; # The language/meta/etc. part
134 $namespace = ''; # The namespace, if any
135 $subtarget = ''; # The '#' thingy
137 $nt = Title
:: newFromText($target);
138 $fl = strtoupper($this->attrs
['FORCEDLINK']) == 'YES';
140 if ($fl ||
count($tp) == 1) {
141 # Plain and simple case
144 # There's stuff missing here...
145 if ($nt->getNamespace() == NS_IMAGE
) {
146 $options[] = $display_title;
147 return $skin->makeImageLinkObj($nt, implode('|', $options));
154 if ($language != '') {
155 # External link within the WikiMedia project
156 return "{language link}";
158 if ($namespace != '') {
159 # Link to another namespace, check for image/media stuff
160 return "{namespace link}";
162 return $skin->makeLink($target, $display_title);
167 /** @todo document */
168 function makeInternalLink(& $parser) {
171 foreach ($this->children
as $child) {
172 if (is_string($child)) {
173 # This shouldn't be the case!
175 if ($child->name
== 'LINKTARGET') {
176 $target = trim($child->makeXHTML($parser));
178 $option[] = trim($child->makeXHTML($parser));
183 if (count($option) == 0)
184 $option[] = $target; # Create dummy display title
185 $display_title = array_pop($option);
186 return $this->createInternalLink($parser, $target, $display_title, $option);
189 /** @todo document */
190 function getTemplateXHTML($title, $parts, & $parser) {
191 global $wgLang, $wgUser;
192 $skin = $wgUser->getSkin();
193 $ot = $title; # Original title
194 if (count(explode(':', $title)) == 1)
195 $title = $wgLang->getNsText(NS_TEMPLATE
).":".$title;
196 $nt = Title
:: newFromText($title);
197 $id = $nt->getArticleID();
199 # No/non-existing page
200 return $skin->makeBrokenLink($title, $ot);
204 $tv = array (); # Template variables
205 foreach ($parts AS $part) {
207 $x = explode('=', $part, 2);
212 $value = array_pop($x);
215 $art = new Article($nt);
216 $text = $art->getContent(false);
217 $parser->plain_parse($text, true, $tv);
223 * This function actually converts wikiXML into XHTML tags
224 * @todo use switch() !
226 function makeXHTML(& $parser) {
228 $n = $this->name
; # Shortcut
230 if ($n == 'EXTENSION') {
233 $ext = strtoupper($this->attrs
['NAME']);
245 $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-)
273 unset ($this->attrs
['NAME']); # Cleanup
274 } elseif ($parser->nowiki
> 0) {
275 # No 'real' wiki tags allowed in nowiki section
278 } // $n = 'EXTENSION'
282 $ret .= $this->sub_makeXHTML($parser);
285 $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs
['LEVEL']);
288 $ret .= $this->sub_makeXHTML($parser, 'p');
291 $ret .= $this->sub_makeXHTML($parser, 'strong');
294 $ret .= $this->sub_makeXHTML($parser, 'em');
297 # These don't exist as wiki markup
299 $ret .= $this->sub_makeXHTML($parser, 'u');
302 $ret .= $this->sub_makeXHTML($parser, 'strike');
307 # Comments are parsed out
314 $ret .= $this->makeInternalLink($parser);
318 $ret .= $this->sub_makeXHTML($parser);
322 $parts = $this->sub_makeXHTML($parser);
323 $parts = explode('|', $parts);
324 $title = array_shift($parts);
325 $ret .= $this->getTemplateXHTML($title, $parts, & $parser);
329 $x = $this->sub_makeXHTML($parser);
330 if (isset ($parser->mCurrentTemplateOptions
["{$x}"]))
331 $ret .= $parser->mCurrentTemplateOptions
["{$x}"];
334 # Internal use, not generated by wiki2xml parser
336 $ret .= $this->sub_makeXHTML($parser);
340 $ret .= $this->sub_makeXHTML($parser, '');
344 # Unknown HTML extension
345 case 'EXTENSION': # This is currently a dummy!!!
346 $ext = $this->attrs
['NAME'];
348 $ret .= '<'.$ext.'>';
349 $ret .= $this->sub_makeXHTML($parser);
350 $ret .= '</'.$ext.'> ';
357 $ret .= $this->sub_makeXHTML($parser, 'table');
360 $ret .= $this->sub_makeXHTML($parser, 'tr');
363 $ret .= $this->sub_makeXHTML($parser, 'td');
366 $ret .= $this->sub_makeXHTML($parser, 'th');
369 $ret .= $this->sub_makeXHTML($parser, 'caption');
371 case 'ATTRS': # SPECIAL CASE : returning attributes
372 return $this->getTheseAttrs();
377 if ($parser->mListType
== 'dl')
378 $ret .= $this->sub_makeXHTML($parser, 'dd');
380 $ret .= $this->sub_makeXHTML($parser, 'li');
383 $type = 'ol'; # Default
384 if ($this->attrs
['TYPE'] == 'bullet')
387 if ($this->attrs
['TYPE'] == 'indent')
389 $oldtype = $parser->mListType
;
390 $parser->mListType
= $type;
391 $ret .= $this->sub_makeXHTML($parser, $type);
392 $parser->mListType
= $oldtype;
395 # Something else entirely
397 $ret .= '<'.$n.'>';
398 $ret .= $this->sub_makeXHTML($parser);
399 $ret .= '</'.$n.'> ';
403 $ret = str_replace("\n\n", "\n", $ret);
408 * A function for additional debugging output
412 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
414 $ret .= '<li> <b> Attributes: </b>';
415 foreach ($this->attrs
as $name => $value) {
416 $ret .= "$name => $value; ";
420 foreach ($this->children
as $child) {
421 if (is_string($child)) {
422 $ret .= "<li> $child </li>\n";
424 $ret .= $child->myPrint();
432 $ancStack = array (); // the stack with ancestral elements
434 // START Three global functions needed for parsing, sorry guys
435 /** @todo document */
436 function wgXMLstartElement($parser, $name, $attrs) {
439 $newElem = new element
;
440 $newElem->name
= $name;
441 $newElem->attrs
= $attrs;
443 array_push($ancStack, $newElem);
446 /** @todo document */
447 function wgXMLendElement($parser, $name) {
448 global $ancStack, $rootElem;
449 // pop element off stack
450 $elem = array_pop($ancStack);
451 if (count($ancStack) == 0)
454 // add it to its parent
455 array_push($ancStack[count($ancStack) - 1]->children
, $elem);
458 /** @todo document */
459 function wgXMLcharacterData($parser, $data) {
461 $data = trim($data); // Don't add blank lines, they're no use...
462 // add to parent if parent exists
463 if ($ancStack && $data != "") {
464 array_push($ancStack[count($ancStack) - 1]->children
, $data);
467 // END Three global functions needed for parsing, sorry guys
470 * Here's the class that generates a nice tree
472 * @subpackage Experimental
476 /** @todo document */
477 function & scanFile($filename) {
478 global $ancStack, $rootElem;
479 $ancStack = array ();
481 $xml_parser = xml_parser_create();
482 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
483 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
484 if (!($fp = fopen($filename, 'r'))) {
485 die('could not open XML input');
487 while ($data = fread($fp, 4096)) {
488 if (!xml_parse($xml_parser, $data, feof($fp))) {
489 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
492 xml_parser_free($xml_parser);
494 // return the remaining root element we copied in the beginning
498 /** @todo document */
499 function scanString($input) {
500 global $ancStack, $rootElem;
501 $ancStack = array ();
503 $xml_parser = xml_parser_create();
504 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
505 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
507 if (!xml_parse($xml_parser, $input, true)) {
508 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
510 xml_parser_free($xml_parser);
512 // return the remaining root element we copied in the beginning
521 * @subpackage Experimental
523 class ParserXML
extends Parser
{
528 var $mTagHooks, $mListType;
530 # Cleared with clearState():
531 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array ();
532 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
535 var $mOptions, $mTitle, $mOutputType, $mTemplates, // cache of already loaded templates, avoids
536 // multiple SQL queries for the same string
537 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
538 // in this path. Used for loop detection.
540 var $nowikicount, $mCurrentTemplateOptions;
549 function ParserXML() {
550 $this->mTemplates
= array ();
551 $this->mTemplatePath
= array ();
552 $this->mTagHooks
= array ();
561 function clearState() {
562 $this->mOutput
= new ParserOutput
;
563 $this->mAutonumber
= 0;
564 $this->mLastSection
= "";
565 $this->mDTopen
= false;
566 $this->mVariables
= false;
567 $this->mIncludeCount
= array ();
568 $this->mStripState
= array ();
569 $this->mArgStack
= array ();
570 $this->mInPre
= false;
574 * Turns the wikitext into XML by calling the external parser
577 function html2xml(& $text) {
580 # generating html2xml command path
582 $a = explode('/', $a);
585 $html2xml = implode('/', $a);
588 $tmpfname = tempnam('/tmp', 'FOO');
589 $handle = fopen($tmpfname, 'w');
590 fwrite($handle, utf8_encode($text));
592 exec($html2xml.' < '.$tmpfname, $a);
593 $text = utf8_decode(implode("\n", $a));
597 /** @todo document */
598 function runXMLparser(& $text) {
601 $this->html2xml($text);
603 $tmpfname = tempnam('/tmp', 'FOO');
604 $handle = fopen($tmpfname, 'w');
605 fwrite($handle, $text);
607 exec($wgWiki2xml.' < '.$tmpfname, $a);
608 $text = utf8_decode(implode("\n", $a));
612 /** @todo document */
613 function plain_parse(& $text, $inline = false, $templateOptions = array ()) {
614 $this->runXMLparser($text);
617 $result = $w->scanString($text);
619 $oldTemplateOptions = $this->mCurrentTemplateOptions
;
620 $this->mCurrentTemplateOptions
= $templateOptions;
622 if ($inline) { # Inline rendering off for templates
623 if (count($result->children
) == 1)
624 $result->children
[0]->name
= 'IGNORE';
628 $text = $result->makeXHTML($this); # No debugging info
630 $text = $result->makeXHTML($this).'<hr>'.$text.'<hr>'.$result->myPrint();
631 $this->mCurrentTemplateOptions
= $oldTemplateOptions;
634 /** @todo document */
635 function parse($text, & $title, $options, $linestart = true, $clearState = true) {
636 $this->plain_parse($text);
637 $this->mOutput
->setText($text);
638 return $this->mOutput
;