rdbms: Rename "memCache" to "memStash" in LBFactory
[mediawiki.git] / tests / phpunit / includes / tidy / BalancerTest.php
blob8a4f662a962d5f0f0fb88194d256629ac95ffef2
1 <?php
3 class BalancerTest extends MediaWikiTestCase {
5 /**
6 * Anything that needs to happen before your tests should go here.
7 */
8 protected function setUp() {
9 // Be sure to do call the parent setup and teardown functions.
10 // This makes sure that all the various cleanup and restorations
11 // happen as they should (including the restoration for setMwGlobals).
12 parent::setUp();
15 /**
16 * @covers MediaWiki\Tidy\Balancer
17 * @covers MediaWiki\Tidy\BalanceSets
18 * @covers MediaWiki\Tidy\BalanceElement
19 * @covers MediaWiki\Tidy\BalanceStack
20 * @covers MediaWiki\Tidy\BalanceMarker
21 * @covers MediaWiki\Tidy\BalanceActiveFormattingElements
22 * @dataProvider provideBalancerTests
24 public function testBalancer( $description, $input, $expected, $useTidy ) {
25 $balancer = new MediaWiki\Tidy\Balancer( [
26 'strict' => false, /* not strict */
27 'allowedHtmlElements' => null, /* no sanitization */
28 'tidyCompat' => $useTidy, /* standard parser */
29 'allowComments' => true, /* comment parsing */
30 ] );
31 $output = $balancer->balance( $input );
33 // Ignore self-closing tags
34 $output = preg_replace( '/\s*\/>/', '>', $output );
36 $this->assertEquals( $expected, $output, $description );
39 public static function provideBalancerTests() {
40 // Get the tests from html5lib-tests.json
41 $json = json_decode( file_get_contents(
42 __DIR__ . '/html5lib-tests.json'
43 ), true );
44 // Munge this slightly into the format phpunit expects
45 // for providers, and filter out HTML constructs which
46 // the balancer doesn't support.
47 $tests = [];
48 $okre = "~ \A
49 (?i:<!DOCTYPE\ html>)?
50 <html><head></head><body>
52 </body></html>
53 \z ~xs";
54 foreach ( $json as $filename => $cases ) {
55 foreach ( $cases as $case ) {
56 $html = $case['document']['html'];
57 if ( !preg_match( $okre, $html ) ) {
58 // Skip tests which involve stuff in the <head> or
59 // weird doctypes.
60 continue;
62 // We used to do this:
63 // $html = substr( $html, strlen( $start ), -strlen( $end ) );
64 // But now we use a different field in the test case,
65 // which reports how domino would parse this case in a
66 // no-quirks <body> context. (The original test case may
67 // have had a different context, or relied on quirks mode.)
68 $html = $case['document']['noQuirksBodyHtml'];
69 // Normalize case of SVG attributes.
70 $html = str_replace( 'foreignObject', 'foreignobject', $html );
71 // Normalize case of MathML attributes.
72 $html = str_replace( 'definitionURL', 'definitionurl', $html );
74 if (
75 isset( $case['document']['props']['comment'] ) &&
76 preg_match( ',<!--[^>]*<,', $html )
77 ) {
78 // Skip tests which include HTML comments containing
79 // the < character, which we don't support.
80 continue;
82 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
83 // Skip tests involving <![CDATA[ ]]> quoting.
84 continue;
86 if (
87 stripos( $case['data'], '<!DOCTYPE' ) !== false &&
88 stripos( $case['data'], '<!DOCTYPE html>' ) === false
89 ) {
90 // Skip tests involving unusual doctypes.
91 continue;
93 $literalre = "~ <rdar: | < /? (
94 html | head | body | frame | frameset | plaintext
95 ) > ~xi";
96 if ( preg_match( $literalre, $case['data'] ) ) {
97 // Skip tests involving some literal tags, which are
98 // unsupported but don't show up in the expected output.
99 continue;
101 if (
102 isset( $case['document']['props']['tags']['iframe'] ) ||
103 isset( $case['document']['props']['tags']['noembed'] ) ||
104 isset( $case['document']['props']['tags']['noscript'] ) ||
105 isset( $case['document']['props']['tags']['script'] ) ||
106 isset( $case['document']['props']['tags']['svg script'] ) ||
107 isset( $case['document']['props']['tags']['svg title'] ) ||
108 isset( $case['document']['props']['tags']['title'] ) ||
109 isset( $case['document']['props']['tags']['xmp'] )
111 // Skip tests with unsupported tags which *do* show
112 // up in the expected output.
113 continue;
115 if (
116 $filename === 'entities01.dat' ||
117 $filename === 'entities02.dat' ||
118 preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
119 preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
121 // Skip tests involving entity encoding.
122 continue;
124 if (
125 isset( $case['document']['props']['tagWithLt'] ) ||
126 isset( $case['document']['props']['attrWithFunnyChar'] ) ||
127 preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
128 preg_match( ':</p<p>:', $case['data'] ) ||
129 preg_match( ':<b &=&amp>|<p/x/y/z>:', $case['data'] )
131 // Skip tests with funny tag or attribute names,
132 // which are really tests of the HTML tokenizer, not
133 // the tree builder.
134 continue;
136 if (
137 preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
139 // The Sanitizer normalizes whitespace in attribute
140 // values, which makes this test case invalid.
141 continue;
143 if ( $filename === 'plain-text-unsafe.dat' ) {
144 // Skip tests with ASCII null, etc.
145 continue;
147 $data = preg_replace(
148 '~<!DOCTYPE html>~i', '', $case['data']
150 $tests[] = [
151 $filename, # use better description?
152 $data,
153 $html,
154 false # strict HTML5 compat mode, no tidy
159 # Some additional tests for mediawiki-specific features
160 $tests[] = [
161 'Round-trip serialization for <pre>/<listing>/<textarea>',
162 "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
163 "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
164 true # use the tidy-compatible mode
167 return $tests;