3 class PreprocessorTest
extends MediaWikiTestCase
{
4 var $mTitle = 'Page title';
8 protected function setUp() {
9 global $wgParserConf, $wgContLang;
11 $this->mOptions
= ParserOptions
::newFromUserAndLang( new User
, $wgContLang );
12 $name = isset( $wgParserConf['preprocessorClass'] ) ?
$wgParserConf['preprocessorClass'] : 'Preprocessor_DOM';
14 $this->mPreprocessor
= new $name( $this );
17 function getStripList() {
18 return array( 'gallery', 'display map' /* Used by Maps, see r80025 CR */, '/foo' );
21 public static function provideCases() {
23 array( "Foo", "<root>Foo</root>" ),
24 array( "<!-- Foo -->", "<root><comment><!-- Foo --></comment></root>" ),
25 array( "<!-- Foo --><!-- Bar -->", "<root><comment><!-- Foo --></comment><comment><!-- Bar --></comment></root>" ),
26 array( "<!-- Foo --> <!-- Bar -->", "<root><comment><!-- Foo --></comment> <comment><!-- Bar --></comment></root>" ),
27 array( "<!-- Foo --> \n <!-- Bar -->", "<root><comment><!-- Foo --></comment> \n <comment><!-- Bar --></comment></root>" ),
28 array( "<!-- Foo --> \n <!-- Bar -->\n", "<root><comment><!-- Foo --></comment> \n<comment> <!-- Bar -->\n</comment></root>" ),
29 array( "<!-- Foo --> <!-- Bar -->\n", "<root><comment><!-- Foo --></comment> <comment><!-- Bar --></comment>\n</root>" ),
30 array( "<!-->Bar", "<root><comment><!-->Bar</comment></root>" ),
31 array( "<!-- Comment -- comment", "<root><comment><!-- Comment -- comment</comment></root>" ),
32 array( "== Foo ==\n <!-- Bar -->\n== Baz ==\n", "<root><h level=\"2\" i=\"1\">== Foo ==</h>\n<comment> <!-- Bar -->\n</comment><h level=\"2\" i=\"2\">== Baz ==</h>\n</root>" ),
33 array( "<gallery/>", "<root><ext><name>gallery</name><attr></attr></ext></root>" ),
34 array( "Foo <gallery/> Bar", "<root>Foo <ext><name>gallery</name><attr></attr></ext> Bar</root>" ),
35 array( "<gallery></gallery>", "<root><ext><name>gallery</name><attr></attr><inner></inner><close></gallery></close></ext></root>" ),
36 array( "<foo> <gallery></gallery>", "<root><foo> <ext><name>gallery</name><attr></attr><inner></inner><close></gallery></close></ext></root>" ),
37 array( "<foo> <gallery><gallery></gallery>", "<root><foo> <ext><name>gallery</name><attr></attr><inner><gallery></inner><close></gallery></close></ext></root>" ),
38 array( "<noinclude> Foo bar </noinclude>", "<root><ignore><noinclude></ignore> Foo bar <ignore></noinclude></ignore></root>" ),
39 array( "<noinclude>\n{{Foo}}\n</noinclude>", "<root><ignore><noinclude></ignore>\n<template lineStart=\"1\"><title>Foo</title></template>\n<ignore></noinclude></ignore></root>" ),
40 array( "<noinclude>\n{{Foo}}\n</noinclude>\n", "<root><ignore><noinclude></ignore>\n<template lineStart=\"1\"><title>Foo</title></template>\n<ignore></noinclude></ignore>\n</root>" ),
41 array( "<gallery>foo bar", "<root><ext><name>gallery</name><attr></attr><inner>foo bar</inner></ext></root>" ),
42 array( "<{{foo}}>", "<root><<template><title>foo</title></template>></root>" ),
43 array( "<{{{foo}}}>", "<root><<tplarg><title>foo</title></tplarg>></root>" ),
44 array( "<gallery></gallery</gallery>", "<root><ext><name>gallery</name><attr></attr><inner></gallery</inner><close></gallery></close></ext></root>" ),
45 array( "=== Foo === ", "<root><h level=\"3\" i=\"1\">=== Foo === </h></root>" ),
46 array( "==<!-- -->= Foo === ", "<root><h level=\"2\" i=\"1\">==<comment><!-- --></comment>= Foo === </h></root>" ),
47 array( "=== Foo ==<!-- -->= ", "<root><h level=\"1\" i=\"1\">=== Foo ==<comment><!-- --></comment>= </h></root>" ),
48 array( "=== Foo ===<!-- -->\n", "<root><h level=\"3\" i=\"1\">=== Foo ===<comment><!-- --></comment></h>\n</root>" ),
49 array( "=== Foo ===<!-- --> <!-- -->\n", "<root><h level=\"3\" i=\"1\">=== Foo ===<comment><!-- --></comment> <comment><!-- --></comment></h>\n</root>" ),
50 array( "== Foo ==\n== Bar == \n", "<root><h level=\"2\" i=\"1\">== Foo ==</h>\n<h level=\"2\" i=\"2\">== Bar == </h>\n</root>" ),
51 array( "===========", "<root><h level=\"5\" i=\"1\">===========</h></root>" ),
52 array( "Foo\n=\n==\n=\n", "<root>Foo\n=\n==\n=\n</root>" ),
53 array( "{{Foo}}", "<root><template><title>Foo</title></template></root>" ),
54 array( "\n{{Foo}}", "<root>\n<template lineStart=\"1\"><title>Foo</title></template></root>" ),
55 array( "{{Foo|bar}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part></template></root>" ),
56 array( "{{Foo|bar}}a", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part></template>a</root>" ),
57 array( "{{Foo|bar|baz}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part><part><name index=\"2\" /><value>baz</value></part></template></root>" ),
58 array( "{{Foo|1=bar}}", "<root><template><title>Foo</title><part><name>1</name>=<value>bar</value></part></template></root>" ),
59 array( "{{Foo|=bar}}", "<root><template><title>Foo</title><part><name></name>=<value>bar</value></part></template></root>" ),
60 array( "{{Foo|bar=baz}}", "<root><template><title>Foo</title><part><name>bar</name>=<value>baz</value></part></template></root>" ),
61 array( "{{Foo|{{bar}}=baz}}", "<root><template><title>Foo</title><part><name><template><title>bar</title></template></name>=<value>baz</value></part></template></root>" ),
62 array( "{{Foo|1=bar|baz}}", "<root><template><title>Foo</title><part><name>1</name>=<value>bar</value></part><part><name index=\"1\" /><value>baz</value></part></template></root>" ),
63 array( "{{Foo|1=bar|2=baz}}", "<root><template><title>Foo</title><part><name>1</name>=<value>bar</value></part><part><name>2</name>=<value>baz</value></part></template></root>" ),
64 array( "{{Foo|bar|foo=baz}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part><part><name>foo</name>=<value>baz</value></part></template></root>" ),
65 array( "{{{1}}}", "<root><tplarg><title>1</title></tplarg></root>" ),
66 array( "{{{1|}}}", "<root><tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg></root>" ),
67 array( "{{{Foo}}}", "<root><tplarg><title>Foo</title></tplarg></root>" ),
68 array( "{{{Foo|}}}", "<root><tplarg><title>Foo</title><part><name index=\"1\" /><value></value></part></tplarg></root>" ),
69 array( "{{{Foo|bar|baz}}}", "<root><tplarg><title>Foo</title><part><name index=\"1\" /><value>bar</value></part><part><name index=\"2\" /><value>baz</value></part></tplarg></root>" ),
70 array( "{<!-- -->{Foo}}", "<root>{<comment><!-- --></comment>{Foo}}</root>" ),
71 array( "{{{{Foobar}}}}", "<root>{<tplarg><title>Foobar</title></tplarg>}</root>" ),
72 array( "{{{ {{Foo}} }}}", "<root><tplarg><title> <template><title>Foo</title></template> </title></tplarg></root>" ),
73 array( "{{ {{{Foo}}} }}", "<root><template><title> <tplarg><title>Foo</title></tplarg> </title></template></root>" ),
74 array( "{{{{{Foo}}}}}", "<root><template><title><tplarg><title>Foo</title></tplarg></title></template></root>" ),
75 array( "{{{{{Foo}} }}}", "<root><tplarg><title><template><title>Foo</title></template> </title></tplarg></root>" ),
76 array( "{{{{{{Foo}}}}}}", "<root><tplarg><title><tplarg><title>Foo</title></tplarg></title></tplarg></root>" ),
77 array( "{{{{{{Foo}}}}}", "<root>{<template><title><tplarg><title>Foo</title></tplarg></title></template></root>" ),
78 array( "[[[Foo]]", "<root>[[[Foo]]</root>" ),
79 array( "{{Foo|[[[[bar]]|baz]]}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>[[[[bar]]|baz]]</value></part></template></root>" ), // This test is important, since it means the difference between having the [[ rule stacked or not
80 array( "{{Foo|[[[[bar]|baz]]}}", "<root>{{Foo|[[[[bar]|baz]]}}</root>" ),
81 array( "{{Foo|Foo [[[[bar]|baz]]}}", "<root>{{Foo|Foo [[[[bar]|baz]]}}</root>" ),
82 array( "Foo <display map>Bar</display map >Baz", "<root>Foo <ext><name>display map</name><attr></attr><inner>Bar</inner><close></display map ></close></ext>Baz</root>" ),
83 array( "Foo <display map foo>Bar</display map >Baz", "<root>Foo <ext><name>display map</name><attr> foo</attr><inner>Bar</inner><close></display map ></close></ext>Baz</root>" ),
84 array( "Foo <gallery bar=\"baz\" />", "<root>Foo <ext><name>gallery</name><attr> bar="baz" </attr></ext></root>" ),
85 array( "Foo <gallery bar=\"1\" baz=2 />", "<root>Foo <ext><name>gallery</name><attr> bar="1" baz=2 </attr></ext></root>" ),
86 array( "</foo>Foo<//foo>", "<root><ext><name>/foo</name><attr></attr><inner>Foo</inner><close><//foo></close></ext></root>" ), # Worth blacklisting IMHO
87 array( "{{#ifexpr: ({{{1|1}}} = 2) | Foo | Bar }}", "<root><template><title>#ifexpr: (<tplarg><title>1</title><part><name index=\"1\" /><value>1</value></part></tplarg> = 2) </title><part><name index=\"1\" /><value> Foo </value></part><part><name index=\"2\" /><value> Bar </value></part></template></root>" ),
88 array( "{{#if: {{{1|}}} | Foo | {{Bar}} }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> Foo </value></part><part><name index=\"2\" /><value> <template><title>Bar</title></template> </value></part></template></root>" ),
89 array( "{{#if: {{{1|}}} | Foo | [[Bar]] }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> Foo </value></part><part><name index=\"2\" /><value> [[Bar]] </value></part></template></root>" ),
90 array( "{{#if: {{{1|}}} | [[Foo]] | Bar }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> [[Foo]] </value></part><part><name index=\"2\" /><value> Bar </value></part></template></root>" ),
91 array( "{{#if: {{{1|}}} | 1 | {{#if: {{{1|}}} | 2 | 3 }} }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> 1 </value></part><part><name index=\"2\" /><value> <template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> 2 </value></part><part><name index=\"2\" /><value> 3 </value></part></template> </value></part></template></root>" ),
92 array( "{{ {{Foo}}", "<root>{{ <template><title>Foo</title></template></root>" ),
93 array( "{{Foobar {{Foo}} {{Bar}} {{Baz}} ", "<root>{{Foobar <template><title>Foo</title></template> <template><title>Bar</title></template> <template><title>Baz</title></template> </root>" ),
94 array( "[[Foo]] |", "<root>[[Foo]] |</root>" ),
95 array( "{{Foo|Bar|", "<root>{{Foo|Bar|</root>" ),
96 array( "[[Foo]", "<root>[[Foo]</root>" ),
97 array( "[[Foo|Bar]", "<root>[[Foo|Bar]</root>" ),
98 array( "{{Foo| [[Bar] }}", "<root>{{Foo| [[Bar] }}</root>" ),
99 array( "{{Foo| [[Bar|Baz] }}", "<root>{{Foo| [[Bar|Baz] }}</root>" ),
100 array( "{{Foo|bar=[[baz]}}", "<root>{{Foo|bar=[[baz]}}</root>" ),
101 array( "{{foo|", "<root>{{foo|</root>" ),
102 array( "{{foo|}", "<root>{{foo|}</root>" ),
103 array( "{{foo|} }}", "<root><template><title>foo</title><part><name index=\"1\" /><value>} </value></part></template></root>" ),
104 array( "{{foo|bar=|}", "<root>{{foo|bar=|}</root>" ),
105 array( "{{Foo|} Bar=", "<root>{{Foo|} Bar=</root>" ),
106 array( "{{Foo|} Bar=}}", "<root><template><title>Foo</title><part><name>} Bar</name>=<value></value></part></template></root>" ),
107 /* array( file_get_contents( __DIR__ . '/QuoteQuran.txt' ), file_get_contents( __DIR__ . '/QuoteQuranExpanded.txt' ) ), */
112 * Get XML preprocessor tree from the preprocessor (which may not be the
113 * native XML-based one).
115 * @param string $wikiText
118 function preprocessToXml( $wikiText ) {
119 if ( method_exists( $this->mPreprocessor
, 'preprocessToXml' ) ) {
120 return $this->normalizeXml( $this->mPreprocessor
->preprocessToXml( $wikiText ) );
123 $dom = $this->mPreprocessor
->preprocessToObj( $wikiText );
124 if ( is_callable( array( $dom, 'saveXML' ) ) ) {
125 return $dom->saveXML();
127 return $this->normalizeXml( $dom->__toString() );
132 * Normalize XML string to the form that a DOMDocument saves out.
137 function normalizeXml( $xml ) {
138 return preg_replace( '!<([a-z]+)/>!', '<$1></$1>', str_replace( ' />', '/>', $xml ) );
142 * @dataProvider provideCases
144 function testPreprocessorOutput( $wikiText, $expectedXml ) {
145 $this->assertEquals( $this->normalizeXml( $expectedXml ), $this->preprocessToXml( $wikiText ) );
149 * These are more complex test cases taken out of wiki articles.
151 public static function provideFiles() {
153 array( "QuoteQuran" ), # http://en.wikipedia.org/w/index.php?title=Template:QuoteQuran/sandbox&oldid=237348988 GFDL + CC-BY-SA by Striver
154 array( "Factorial" ), # http://en.wikipedia.org/w/index.php?title=Template:Factorial&oldid=98548758 GFDL + CC-BY-SA by Polonium
155 array( "All_system_messages" ), # http://tl.wiktionary.org/w/index.php?title=Suleras:All_system_messages&oldid=2765 GPL text generated by MediaWiki
156 array( "Fundraising" ), # http://tl.wiktionary.org/w/index.php?title=MediaWiki:Sitenotice&oldid=5716 GFDL + CC-BY-SA, copied there by Sky Harbor.
157 array( "NestedTemplates" ), # bug 27936
162 * @dataProvider provideFiles
164 function testPreprocessorOutputFiles( $filename ) {
165 $folder = __DIR__
. "/../../../parser/preprocess";
166 $wikiText = file_get_contents( "$folder/$filename.txt" );
167 $output = $this->preprocessToXml( $wikiText );
169 $expectedFilename = "$folder/$filename.expected";
170 if ( file_exists( $expectedFilename ) ) {
171 $expectedXml = $this->normalizeXml( file_get_contents( $expectedFilename ) );
172 $this->assertEquals( $expectedXml, $output );
174 $tempFilename = tempnam( $folder, "$filename." );
175 file_put_contents( $tempFilename, $output );
176 $this->markTestIncomplete( "File $expectedFilename missing. Output stored as $tempFilename" );
181 * Tests from Bug 28642 ยท https://bugzilla.wikimedia.org/28642
183 public static function provideHeadings() {
184 return array( /* These should become headings: */
185 array( "== h ==<!--c1-->", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment></h></root>" ),
186 array( "== h == <!--c1-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment></h></root>" ),
187 array( "== h ==<!--c1--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment> </h></root>" ),
188 array( "== h == <!--c1--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> </h></root>" ),
189 array( "== h ==<!--c1--><!--c2-->", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment><comment><!--c2--></comment></h></root>" ),
190 array( "== h == <!--c1--><!--c2-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment><comment><!--c2--></comment></h></root>" ),
191 array( "== h ==<!--c1--><!--c2--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment><comment><!--c2--></comment> </h></root>" ),
192 array( "== h == <!--c1--><!--c2--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment><comment><!--c2--></comment> </h></root>" ),
193 array( "== h == <!--c1--> <!--c2-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> <comment><!--c2--></comment></h></root>" ),
194 array( "== h ==<!--c1--> <!--c2--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment> <comment><!--c2--></comment> </h></root>" ),
195 array( "== h == <!--c1--> <!--c2--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> <comment><!--c2--></comment> </h></root>" ),
196 array( "== h ==<!--c1--><!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment><comment><!--c2--></comment><comment><!--c3--></comment></h></root>" ),
197 array( "== h ==<!--c1--> <!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment> <comment><!--c2--></comment><comment><!--c3--></comment></h></root>" ),
198 array( "== h ==<!--c1--><!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment><comment><!--c2--></comment> <comment><!--c3--></comment></h></root>" ),
199 array( "== h ==<!--c1--> <!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment> <comment><!--c2--></comment> <comment><!--c3--></comment></h></root>" ),
200 array( "== h == <!--c1--><!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment><comment><!--c2--></comment><comment><!--c3--></comment></h></root>" ),
201 array( "== h == <!--c1--> <!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> <comment><!--c2--></comment><comment><!--c3--></comment></h></root>" ),
202 array( "== h == <!--c1--><!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment><comment><!--c2--></comment> <comment><!--c3--></comment></h></root>" ),
203 array( "== h == <!--c1--> <!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> <comment><!--c2--></comment> <comment><!--c3--></comment></h></root>" ),
204 array( "== h ==<!--c1--><!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment><comment><!--c2--></comment><comment><!--c3--></comment> </h></root>" ),
205 array( "== h ==<!--c1--> <!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment> <comment><!--c2--></comment><comment><!--c3--></comment> </h></root>" ),
206 array( "== h ==<!--c1--><!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment><comment><!--c2--></comment> <comment><!--c3--></comment> </h></root>" ),
207 array( "== h ==<!--c1--> <!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment><!--c1--></comment> <comment><!--c2--></comment> <comment><!--c3--></comment> </h></root>" ),
208 array( "== h == <!--c1--><!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment><comment><!--c2--></comment><comment><!--c3--></comment> </h></root>" ),
209 array( "== h == <!--c1--> <!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> <comment><!--c2--></comment><comment><!--c3--></comment> </h></root>" ),
210 array( "== h == <!--c1--><!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment><comment><!--c2--></comment> <comment><!--c3--></comment> </h></root>" ),
211 array( "== h == <!--c1--> <!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment><!--c1--></comment> <comment><!--c2--></comment> <comment><!--c3--></comment> </h></root>" ),
213 /* These are not working: */
214 array( "== h ==<!--c1--> <!--c2-->", "<root>== h ==<comment><!--c1--></comment> <comment><!--c2--></comment></root>" ),
215 array( "== h == <!--c1--> <!--c2-->", "<root>== h == <comment><!--c1--></comment> <comment><!--c2--></comment></root>" ),
216 array( "== h ==<!--c1--> <!--c2--> ", "<root>== h ==<comment><!--c1--></comment> <comment><!--c2--></comment> </root>" ),
217 array( "== h == x <!--c1--><!--c2--><!--c3--> ", "<root>== h == x <comment><!--c1--></comment><comment><!--c2--></comment><comment><!--c3--></comment> </root>" ),
218 array( "== h ==<!--c1--> x <!--c2--><!--c3--> ", "<root>== h ==<comment><!--c1--></comment> x <comment><!--c2--></comment><comment><!--c3--></comment> </root>" ),
219 array( "== h ==<!--c1--><!--c2--><!--c3--> x ", "<root>== h ==<comment><!--c1--></comment><comment><!--c2--></comment><comment><!--c3--></comment> x </root>" ),
224 * @dataProvider provideHeadings
226 function testHeadings( $wikiText, $expectedXml ) {
227 $this->assertEquals( $this->normalizeXml( $expectedXml ), $this->preprocessToXml( $wikiText ) );