7 class ParserTestResultNormalizer
{
8 protected $doc, $xpath, $invalid;
10 public static function normalize( $text, $funcs ) {
11 $norm = new self( $text );
12 if ( $norm->invalid
) {
15 foreach ( $funcs as $func ) {
18 return $norm->serialize();
21 protected function __construct( $text ) {
22 $this->doc
= new DOMDocument( '1.0', 'utf-8' );
24 // Note: parsing a supposedly XHTML document with an XML parser is not
25 // guaranteed to give accurate results. For example, it may introduce
26 // differences in the number of line breaks in <pre> tags.
28 MediaWiki\
suppressWarnings();
29 if ( !$this->doc
->loadXML( '<html><body>' . $text . '</body></html>' ) ) {
30 $this->invalid
= true;
32 MediaWiki\restoreWarnings
();
33 $this->xpath
= new DOMXPath( $this->doc
);
34 $this->body
= $this->xpath
->query( '//body' )->item( 0 );
37 protected function removeTbody() {
38 foreach ( $this->xpath
->query( '//tbody' ) as $tbody ) {
39 while ( $tbody->firstChild
) {
40 $child = $tbody->firstChild
;
41 $tbody->removeChild( $child );
42 $tbody->parentNode
->insertBefore( $child, $tbody );
44 $tbody->parentNode
->removeChild( $tbody );
49 * The point of this function is to produce a normalized DOM in which
50 * Tidy's output matches the output of html5depurate. Tidy both trims
51 * and pretty-prints, so this requires fairly aggressive treatment.
53 * In particular, note that Tidy converts <pre>x</pre> to <pre>\nx\n</pre>,
54 * which theoretically affects display since the second line break is not
55 * ignored by compliant HTML parsers.
57 * This function also removes empty elements, as does Tidy.
59 protected function trimWhitespace() {
60 foreach ( $this->xpath
->query( '//text()' ) as $child ) {
61 if ( strtolower( $child->parentNode
->nodeName
) === 'pre' ) {
62 // Just trim one line break from the start and end
63 if ( substr_compare( $child->data
, "\n", 0 ) === 0 ) {
64 $child->data
= substr( $child->data
, 1 );
66 if ( substr_compare( $child->data
, "\n", -1 ) === 0 ) {
67 $child->data
= substr( $child->data
, 0, -1 );
70 // Trim all whitespace
71 $child->data
= trim( $child->data
);
73 if ( $child->data
=== '' ) {
74 $child->parentNode
->removeChild( $child );
80 * Serialize the XML DOM for comparison purposes. This does not generate HTML.
82 protected function serialize() {
83 return strtr( $this->doc
->saveXML( $this->body
),
84 [ '<body>' => '', '</body>' => '' ] );