3 class BalancerTest
extends MediaWikiTestCase
{
7 * Anything that needs to happen before your tests should go here.
9 protected function setUp() {
10 // Be sure to do call the parent setup and teardown functions.
11 // This makes sure that all the various cleanup and restorations
12 // happen as they should (including the restoration for setMwGlobals).
14 $this->balancer
= new MediaWiki\Tidy\
Balancer( [
15 'strict' => false, /* not strict */
16 'allowedHtmlElements' => null, /* no sanitization */
17 'tidyCompat' => false, /* standard parser */
18 'allowComments' => true, /* comment parsing */
23 * @covers MediaWiki\Tidy\Balancer::balance
24 * @dataProvider provideBalancerTests
26 public function testBalancer( $description, $input, $expected ) {
27 $output = $this->balancer
->balance( $input );
29 // Ignore self-closing tags
30 $output = preg_replace( '/\s*\/>/', '>', $output );
32 $this->assertEquals( $expected, $output, $description );
35 public static function provideBalancerTests() {
36 // Get the tests from html5lib-tests.json
37 $json = json_decode( file_get_contents(
38 __DIR__
. '/html5lib-tests.json'
40 // Munge this slightly into the format phpunit expects
41 // for providers, and filter out HTML constructs which
42 // the balancer doesn't support.
45 (?i:<!DOCTYPE\ html>)?
46 <html><head></head><body>
50 foreach ( $json as $filename => $cases ) {
51 foreach ( $cases as $case ) {
52 $html = $case['document']['html'];
53 if ( !preg_match( $okre, $html ) ) {
54 // Skip tests which involve stuff in the <head> or
58 // We used to do this:
59 // $html = substr( $html, strlen( $start ), -strlen( $end ) );
60 // But now we use a different field in the test case,
61 // which reports how domino would parse this case in a
62 // no-quirks <body> context. (The original test case may
63 // have had a different context, or relied on quirks mode.)
64 $html = $case['document']['noQuirksBodyHtml'];
65 // Normalize case of SVG attributes.
66 $html = str_replace( 'foreignObject', 'foreignobject', $html );
67 // Normalize case of MathML attributes.
68 $html = str_replace( 'definitionURL', 'definitionurl', $html );
71 isset( $case['document']['props']['comment'] ) &&
72 preg_match( ',<!--[^>]*<,', $html )
74 // Skip tests which include HTML comments containing
75 // the < character, which we don't support.
78 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
79 // Skip tests involving <![CDATA[ ]]> quoting.
83 stripos( $case['data'], '<!DOCTYPE' ) !== false &&
84 stripos( $case['data'], '<!DOCTYPE html>' ) === false
86 // Skip tests involving unusual doctypes.
89 $literalre = "~ <rdar: | <isindex | < /? (
90 html | head | body | frame | frameset | plaintext
92 if ( preg_match( $literalre, $case['data'] ) ) {
93 // Skip tests involving some literal tags, which are
94 // unsupported but don't show up in the expected output.
98 isset( $case['document']['props']['tags']['iframe'] ) ||
99 isset( $case['document']['props']['tags']['noembed'] ) ||
100 isset( $case['document']['props']['tags']['noscript'] ) ||
101 isset( $case['document']['props']['tags']['script'] ) ||
102 isset( $case['document']['props']['tags']['svg script'] ) ||
103 isset( $case['document']['props']['tags']['svg title'] ) ||
104 isset( $case['document']['props']['tags']['title'] ) ||
105 isset( $case['document']['props']['tags']['xmp'] )
107 // Skip tests with unsupported tags which *do* show
108 // up in the expected output.
112 $filename === 'entities01.dat' ||
113 $filename === 'entities02.dat' ||
114 preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
115 preg_match( '/^(&|&#|&#X|&#x|-|&x-test|&)$/', $case['data'] )
117 // Skip tests involving entity encoding.
121 isset( $case['document']['props']['tagWithLt'] ) ||
122 isset( $case['document']['props']['attrWithFunnyChar'] ) ||
123 preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
124 preg_match( ':</p<p>:', $case['data'] ) ||
125 preg_match( ':<b &=&>|<p/x/y/z>:', $case['data'] )
127 // Skip tests with funny tag or attribute names,
128 // which are really tests of the HTML tokenizer, not
133 preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
135 // The Sanitizer normalizes whitespace in attribute
136 // values, which makes this test case invalid.
139 if ( $filename === 'plain-text-unsafe.dat' ) {
140 // Skip tests with ASCII null, etc.
143 $data = preg_replace(
144 '~<!DOCTYPE html>~i', '', $case['data']
147 $filename, # use better description?