3 namespace MediaWiki\Tests\Maintenance
;
5 use PHPUnit\Framework\Assert
;
9 * Helper for asserting the structure of an XML dump stream.
14 * Holds the XMLReader used for analyzing an XML dump
18 protected $xml = null;
21 * XML dump schema version
25 protected $schemaVersion;
30 private $varMapping = [];
33 * @param string $schemaVersion see XML_DUMP_SCHEMA_VERSION_XX
35 public function __construct( $schemaVersion ) {
36 $this->schemaVersion
= $schemaVersion;
40 * Step the current XML reader until node start of given name is found.
42 * @param string $name Name of the element to look for
43 * (e.g.: "text" when looking for <text>)
45 * @param bool $allowAscend Whether the search should continue in parent
46 * nodes of the current position. If false (the default), the search will be aborted
47 * on the next closing element.
49 * @return bool True if the node could be found. false otherwise.
51 public function skipToNode( $name, $allowAscend = false ) {
54 $current = $this->xml
->name
;
55 if ( $this->xml
->nodeType
== XMLReader
::ELEMENT
) {
56 if ( $current == $name ) {
60 if ( !$this->xml
->isEmptyElement
) {
65 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
) {
67 if ( $depth < 0 && !$allowAscend ) {
72 if ( !$this->xml
->read() ) {
81 * Step the current XML reader until node start of given name is found,
82 * and advance to the first child node.
84 * @param string $name Name of the element to look for
85 * (e.g.: "text" when looking for <text>)
87 * @param bool $allowAscend Whether the search should continue in parent
88 * nodes of the current position. If false (the default), the search will be aborted
89 * on the next closing element.
91 public function skipIntoNode( $name, $allowAscend = false ) {
92 Assert
::assertTrue( $this->skipToNode( $name, $allowAscend ),
93 "Skipping to $name" );
95 Assert
::assertTrue( !$this->xml
->isEmptyElement
,
96 "Skipping into $name" );
102 * Step the current XML reader until node end of given name is found.
104 * @param string $name Name of the closing element to look for
105 * (e.g.: "mediawiki" when looking for </mediawiki>)
107 * @return bool True if the end node could be found. false otherwise.
109 public function skipToNodeEnd( $name ) {
110 while ( $this->xml
->read() ) {
111 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
&&
112 $this->xml
->name
== $name
122 * Step the current XML reader to the first element start after the node
123 * end of a given name.
125 * @param string $name Name of the closing element to look for
126 * (e.g.: "mediawiki" when looking for </mediawiki>)
128 * @return bool True if new element after the closing of $name could be
129 * found. false otherwise.
131 public function skipPastNodeEnd( $name ) {
132 Assert
::assertTrue( $this->skipToNodeEnd( $name ),
133 "Skipping to end of $name" );
134 while ( $this->xml
->read() ) {
135 if ( $this->xml
->nodeType
== XMLReader
::ELEMENT
) {
144 * Opens an XML file to analyze.
146 * @param string $fname Name of file to analyze
148 public function open( $fname ) {
149 $this->xml
= new XMLReader();
151 Assert
::assertTrue( $this->xml
->open( $fname ),
152 "Opening temporary file $fname via XMLReader failed" );
156 * Opens an XML file to analyze, verifies the top level tags,
157 * and skips past <siteinfo>.
159 * The contents of the <siteinfo> tag can be checked if $siteInfoTemplate
160 * is given. See assertDumpHead().
162 * @param string $fname Name of file to analyze
164 * @param string|null $siteInfoTemplate
165 * @param string $language
167 public function assertDumpStart( $fname, $siteInfoTemplate = null, $language = 'en' ) {
168 $this->open( $fname );
169 $this->assertDumpHead( $siteInfoTemplate, $language );
173 * Asserts that the head of a dump is valid.
174 * This checks the attributes of the top level <mediawiki> tag.
176 * If $siteInfoTemplate is given, it is interpreted as the file name
177 * of an XML template that will be used with assertDOM() to check the contents
178 * of the <siteinfo> tag, which is expected to be the first child of
179 * the top level <mediawiki>. Variable substitution applies as defined by
180 * calling setVarMapping().
182 * After this method returns, the XML reader's position will be after
183 * the closing </siteinfo> tag, before the next tag.
185 * @param string|null $siteInfoTemplate
186 * @param string $language
188 public function assertDumpHead( $siteInfoTemplate = null, $language = 'en' ) {
189 $this->assertNodeStart( 'mediawiki', false );
190 $this->assertAttributes( [
191 "xmlns" => "http://www.mediawiki.org/xml/export-{$this->schemaVersion}/",
192 "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
193 "xsi:schemaLocation" => "http://www.mediawiki.org/xml/export-{$this->schemaVersion}/ "
194 . "http://www.mediawiki.org/xml/export-{$this->schemaVersion}.xsd",
195 "version" => "{$this->schemaVersion}",
196 "xml:lang" => "{$language}"
199 $this->assertNodeStart( 'siteinfo', false );
201 if ( $siteInfoTemplate ) {
202 // Checking site info
203 $this->assertDOM( $siteInfoTemplate );
206 // skip past extra namespaces
207 $this->skipPastNodeEnd( 'siteinfo' );
211 * Asserts that the xml reader is at the final closing tag of an xml file and
214 * @param string $name (optional) the name of the final tag
215 * (e.g.: "mediawiki" for </mediawiki>)
217 public function assertDumpEnd( $name = "mediawiki" ) {
218 $this->assertNodeEnd( $name, false );
219 if ( $this->xml
->read() ) {
220 $this->skipWhitespace();
222 Assert
::assertEquals( $this->xml
->nodeType
, XMLReader
::NONE
,
223 "No proper entity left to parse" );
227 public function close() {
232 * Steps the xml reader over white space
234 public function skipWhitespace() {
236 while ( $cont && ( ( $this->xml
->nodeType
== XMLReader
::NONE
)
237 ||
( $this->xml
->nodeType
== XMLReader
::WHITESPACE
)
238 ||
( $this->xml
->nodeType
== XMLReader
::SIGNIFICANT_WHITESPACE
) ) ) {
239 $cont = $this->xml
->read();
244 * Asserts that the xml reader is at an element of given name, and optionally
245 * skips past it. If the reader is at a whitespace element, the whitespace is
248 * @param string $name The name of the element to check for
249 * (e.g.: "mediawiki" for <mediawiki>)
250 * @param bool $skip (optional) if true, skip past the found element
252 public function assertNodeStart( $name, $skip = true ) {
253 $this->skipWhitespace();
254 Assert
::assertEquals( $name, $this->xml
->name
, "Node name" );
255 Assert
::assertEquals( XMLReader
::ELEMENT
, $this->xml
->nodeType
, "Node type" );
257 Assert
::assertTrue( $this->xml
->read(), "Skipping past start tag" );
262 * Asserts that the XML reader is at an element start, and that the element
263 * has the given attributes with the given values.
264 * Variable substitution applies for variables set via setVarMapping().
266 * @param array $attributes
267 * @param bool $skip (optional) if true, skip past the found element
269 public function assertAttributes( $attributes, $skip = true ) {
270 Assert
::assertEquals( XMLReader
::ELEMENT
, $this->xml
->nodeType
, "Node type" );
271 $actualAttributes = $this->getAttributeArray( $this->xml
);
273 $attributes = array_map(
275 return $this->resolveVars( $v );
279 $actualAttributes = array_intersect_key( $actualAttributes, $attributes );
281 Assert
::assertEquals( $attributes, $actualAttributes, "Attributes" );
284 Assert
::assertTrue( $this->xml
->read(), "Skipping past start tag" );
289 * Asserts that the xml reader is at an element of given name, and that element
292 * @param string $name The name of the element to check for
293 * (e.g.: "text" for <text/>)
294 * @param bool $skip (optional) if true, skip past the found element
295 * @param bool $skip_ws (optional) if true, also skip past white spaces that trail the
298 public function assertEmptyNode( $name, $skip = true, $skip_ws = true ) {
299 $this->assertNodeStart( $name, false );
300 Assert
::assertFalse( !$this->xml
->isEmptyElement
, "$name tag has content" );
303 Assert
::assertTrue( $this->xml
->read(), "Skipping $name tag" );
304 if ( ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
)
305 && ( $this->xml
->name
== $name )
311 $this->skipWhitespace();
317 * Asserts that the xml reader is at a closing element of given name, and optionally
318 * skips past it. If the reader is at a whitespace element, the whitespace is
321 * @param string $name The name of the closing element to check for
322 * (e.g.: "mediawiki" for </mediawiki>)
323 * @param bool $skip (optional) if true, skip past the found element
325 public function assertNodeEnd( $name, $skip = true ) {
326 $this->skipWhitespace();
327 Assert
::assertEquals( $name, $this->xml
->name
, "Node name" );
328 Assert
::assertEquals( XMLReader
::END_ELEMENT
, $this->xml
->nodeType
, "Node type" );
330 // note: if there is no more content after the tag and read() returns false,
337 * Asserts that the xml reader is at an element of given tag that contains a given text,
338 * and skips over the element.
340 * @param string $name The name of the element to check for
341 * (e.g.: "mediawiki" for <mediawiki>...</mediawiki>)
342 * @param string|bool $text If string, check if it equals the elements text.
343 * Variable substitution applies. If false, ignore the element's text.
344 * @param bool $skip_ws (optional) if true, skip past white spaces that trail the
347 public function assertTextNode( $name, $text, $skip_ws = true ) {
348 $this->assertNodeStart( $name );
350 if ( $text !== false ) {
351 $text = $this->resolveVars( $text );
352 $actual = $this->resolveVars( $this->xml
->value
);
353 Assert
::assertEquals( $text, $actual, "Text of node " . $name );
355 Assert
::assertTrue( $this->xml
->read(), "Skipping past processed text of " . $name );
356 $this->assertNodeEnd( $name );
359 $this->skipWhitespace();
364 * Asserts that the xml reader is at the start of a page element and skips over the first
365 * tags, after checking them.
367 * Besides the opening page element, this function also checks for and skips over the
368 * title, ns, and id tags. Hence after this function, the xml reader is at the first
369 * revision of the current page.
371 * @param int $id Id of the page to assert
372 * @param int $ns Number of namespage to assert
373 * @param string $name Title of the current page
375 public function assertPageStart( $id, $ns, $name ) {
376 $this->assertNodeStart( "page" );
377 $this->assertTextNode( "title", $name );
378 $this->assertTextNode( "ns", $ns );
379 $this->assertTextNode( "id", $id );
383 * Asserts that the xml reader is at the page's closing element and skips to the next
386 public function assertPageEnd() {
387 $this->assertNodeEnd( "page" );
391 * Checks and skips tags that represent the properties of a revision.
393 * @param int $id Id of the revision
394 * @param string $summary Summary of the revision
395 * @param string $text_sha1 The base36 SHA-1 of the revision's text
396 * @param string $hasEarlyText Whether a text tag is expected before the <sha1> tag.
397 * Must be one of 'yes', 'no', or maybe.
398 * @param int|bool $parentid (optional) id of the parent revision
399 * @param string $model The expected content model id (default: CONTENT_MODEL_WIKITEXT)
400 * @param string $format The expected format model id (default: CONTENT_FORMAT_WIKITEXT)
401 * @param bool &$foundText Output, whether a text tag was found before the SHA1 tag.
402 * If this returns false, the text tag should be the next tag after the method returns.
404 public function assertRevisionProperties( $id, $summary,
405 $text_sha1, $hasEarlyText = 'maybe', $parentid = false,
406 $model = CONTENT_MODEL_WIKITEXT
, $format = CONTENT_FORMAT_WIKITEXT
,
409 $this->assertTextNode( "id", $id );
410 if ( $parentid !== false ) {
411 $this->assertTextNode( "parentid", $parentid );
413 $this->assertTextNode( "timestamp", false );
415 $this->assertNodeStart( "contributor" );
416 $this->assertTextNode( "username", false );
417 $this->assertTextNode( "id", false );
418 $this->assertNodeEnd( "contributor" );
420 $this->assertTextNode( "comment", $summary );
422 if ( $this->schemaVersion
>= XML_DUMP_SCHEMA_VERSION_11
) {
423 $this->assertTextNode( "origin", false );
426 $this->assertTextNode( "model", $model );
428 $this->assertTextNode( "format", $format );
430 if ( $hasEarlyText === 'yes' ||
( $this->xml
->name
== "text" && $hasEarlyText === 'maybe' ) ) {
432 $this->assertNodeStart( "text", false );
434 $this->skipWhitespace();
440 $this->assertTextNode( "sha1", $text_sha1 );
442 $this->assertEmptyNode( "sha1" );
447 * Asserts that the xml reader is at a revision and checks its representation before
450 * @param int $id Id of the revision
451 * @param string $summary Summary of the revision
452 * @param int $text_id Id of the revision's text
453 * @param int $text_bytes Number of bytes in the revision's text
454 * @param string $text_sha1 The base36 SHA-1 of the revision's text
455 * @param string|bool $text (optional) The revision's string, or false to check for a
457 * @param int|bool $parentid (optional) id of the parent revision
458 * @param string $model The expected content model id (default: CONTENT_MODEL_WIKITEXT)
459 * @param string $format The expected format model id (default: CONTENT_FORMAT_WIKITEXT)
461 public function assertRevision( $id, $summary, $text_id, $text_bytes,
462 $text_sha1, $text = false, $parentid = false,
463 $model = CONTENT_MODEL_WIKITEXT
, $format = CONTENT_FORMAT_WIKITEXT
465 $this->assertNodeStart( "revision" );
467 $this->assertRevisionProperties(
478 if ( !$text_found ) {
479 $this->assertText( $id, $text_id, $text_bytes, $text );
482 $this->assertNodeEnd( "revision" );
483 $this->skipWhitespace();
486 public function assertText( $id, $text_id, $text_bytes, $text ) {
487 $this->assertNodeStart( "text", false );
488 if ( $text_bytes !== false ) {
489 Assert
::assertEquals( $this->xml
->getAttribute( "bytes" ), $text_bytes,
490 "Attribute 'bytes' of revision " . $id );
493 if ( $text === false ) {
494 Assert
::assertEquals( $this->xml
->getAttribute( "id" ), $text_id,
495 "Text id of revision " . $id );
496 Assert
::assertNull( $this->xml
->getAttribute( "xml:space" ),
497 "xml:space attribute shout not be present" );
498 $this->assertEmptyNode( "text" );
500 // Testing for a real dump
501 Assert
::assertEquals( $this->xml
->getAttribute( "xml:space" ), "preserve",
502 "xml:space=preserve should be present" );
503 Assert
::assertTrue( $this->xml
->read(), "Skipping text start tag" );
504 Assert
::assertEquals( $text, $this->xml
->value
, "Text of revision " . $id );
505 Assert
::assertTrue( $this->xml
->read(), "Skipping past text" );
506 $this->assertNodeEnd( "text" );
507 $this->skipWhitespace();
512 * asserts that the xml reader is at the beginning of a log entry and skips over
513 * it while analyzing it.
515 * @param int $id Id of the log entry
516 * @param string $user_name User name of the log entry's performer
517 * @param int $user_id User id of the log entry 's performer
518 * @param string|null $comment Comment of the log entry. If null, the comment text is ignored.
519 * @param string $type Type of the log entry
520 * @param string $subtype Subtype of the log entry
521 * @param string $title Title of the log entry's target
522 * @param array $parameters (optional) unserialized data accompanying the log entry
524 public function assertLogItem( $id, $user_name, $user_id, $comment, $type,
525 $subtype, $title, $parameters = []
527 $this->assertNodeStart( "logitem" );
529 $this->assertTextNode( "id", $id );
530 $this->assertTextNode( "timestamp", false );
532 $this->assertNodeStart( "contributor" );
533 $this->assertTextNode( "username", $user_name );
534 $this->assertTextNode( "id", $user_id );
535 $this->assertNodeEnd( "contributor" );
537 if ( $comment !== null ) {
538 $this->assertTextNode( "comment", $comment );
540 $this->assertTextNode( "type", $type );
541 $this->assertTextNode( "action", $subtype );
542 $this->assertTextNode( "logtitle", $title );
544 $this->assertNodeStart( "params" );
545 $parameters_xml = unserialize( $this->xml
->value
);
546 Assert
::assertEquals( $parameters, $parameters_xml );
547 Assert
::assertTrue( $this->xml
->read(), "Skipping past processed text of params" );
548 $this->assertNodeEnd( "params" );
550 $this->assertNodeEnd( "logitem" );
554 * Returns the XMLReader's current line number for reporting.
556 * @param XMLReader|null $xml
560 public function getLineNumber( ?XMLReader
$xml = null ) {
563 if ( $xml->nodeType
== XMLReader
::NONE
) {
567 return $xml->expand()->getLineNo();
571 * Opens an XML template file and compares it to the XML structure at the current position of
574 * If the outer-most tag of the template file is <test:data>, that tag is
575 * ignored during comparison. This allows template files to contain arbitrary snippets of XML.
576 * When the tag <test:end/> is encountered in the template, the comparison is ended.
577 * This allows template files to be written to match the beginning of a structure,
578 * without the need for subsequent contents to match.
580 * The contents of $file are subject to variable substitution based on
581 * the values provided via setVarMapping().
583 * @param string $file Name of file to analyze
585 public function assertDOM( $file ) {
586 $exXml = new XMLReader();
588 Assert
::assertTrue( $exXml->open( $file ),
589 "Opening fixture file $file via XMLReader failed" );
593 $line = max( $line, $this->getLineNumber( $exXml ) );
594 $location = "[$file line $line] ";
596 while ( $exXml->nodeType
== XMLReader
::NONE
597 ||
$exXml->nodeType
== XMLReader
::WHITESPACE
598 ||
$exXml->nodeType
== XMLReader
::SIGNIFICANT_WHITESPACE
599 ||
$exXml->nodeType
== XMLReader
::COMMENT
600 ||
( $exXml->nodeType
== XMLReader
::ELEMENT
&& $exXml->name
=== 'test:data' ) ) {
602 // Reached the end of the template file, so we are done here.
603 if ( !$exXml->read() ) {
607 // Reached the end of the test data, so we are done here.
608 if ( $exXml->nodeType
== XMLReader
::END_ELEMENT
&& $exXml->name
=== 'test:data' ) {
613 while ( $this->xml
->nodeType
== XMLReader
::NONE
614 ||
$this->xml
->nodeType
== XMLReader
::WHITESPACE
615 ||
$this->xml
->nodeType
== XMLReader
::SIGNIFICANT_WHITESPACE
616 ||
$this->xml
->nodeType
== XMLReader
::COMMENT
) {
617 Assert
::assertTrue( $this->xml
->read(), $location . 'Document ended unexpectedly' );
620 // End comparison early, ignore the rest of the contents of the template file.
621 if ( $exXml->nodeType
== XMLReader
::ELEMENT
&& $exXml->name
=== 'test:end' ) {
625 $line = max( $line, $this->getLineNumber( $exXml ) );
626 $location = "[$file line $line] ";
628 Assert
::assertSame( $exXml->nodeType
, $this->xml
->nodeType
, $location . 'Node type' );
629 Assert
::assertSame( $exXml->name
, $this->xml
->name
, $location . 'Node type' );
632 $this->xml
->hasValue
,
633 $location . 'Node has value?'
636 $exXml->hasAttributes
,
637 $this->xml
->hasAttributes
,
638 $location . 'Node has attributes?'
641 if ( $exXml->hasValue
) {
642 $expValue = $this->resolveVars( $exXml->value
);
643 $actValue = $this->resolveVars( $this->xml
->value
);
644 Assert
::assertSame( $expValue, $actValue, $location . 'Node value' );
647 if ( $exXml->hasAttributes
) {
648 $expectedAttributes = $this->getAttributeArray( $exXml );
649 $actualAttributes = $this->getAttributeArray( $this->xml
);
651 Assert
::assertEquals( $expectedAttributes, $actualAttributes, $location . 'Attributes' );
654 // Reached the end of the template file, so we are done here.
655 if ( !$exXml->read() ) {
659 // Reached the end of the test data, so we are done here.
660 if ( $exXml->nodeType
== XMLReader
::END_ELEMENT
&& $exXml->name
=== 'test:data' ) {
664 Assert
::assertTrue( $this->xml
->read(), $location . 'Document ended unexpectedly' );
671 * Strip any <test:...> tags from a string.
673 * @param string $text
677 public function stripTestTags( $text ) {
678 $text = preg_replace( '@<!--.*?-->@s', '', $text );
679 $text = preg_replace( '@</?test:[^>]+>@', '', $text );
683 private function getAttributeArray( ?XMLReader
$xml = null ) {
688 if ( $xml->nodeType
!== XMLReader
::ELEMENT
) {
692 if ( !$xml->hasAttributes
) {
697 while ( $xml->moveToNextAttribute() ) {
698 $attr[$xml->name
] = $this->resolveVars( $xml->value
);
705 * @param string $text
709 public function resolveVars( $text ) {
711 array_keys( $this->varMapping
),
712 array_values( $this->varMapping
),
718 * Define a variable mapping to be applied by assertDOM
720 * @param string $name
721 * @param string $value
723 public function setVarMapping( $name, $value ) {
724 $key = '{{' . $name . '}}';
725 $this->varMapping
[$key] = $value;
731 public function getSchemaVersion() {
732 return $this->schemaVersion
;