Localisation updates from https://translatewiki.net.
[mediawiki.git] / tests / phpunit / maintenance / DumpAsserter.php
blob24a18b8f8dd995e1430f4a649d679ab6d77812ab
1 <?php
3 namespace MediaWiki\Tests\Maintenance;
5 use PHPUnit\Framework\Assert;
6 use XMLReader;
8 /**
9 * Helper for asserting the structure of an XML dump stream.
11 class DumpAsserter {
13 /**
14 * Holds the XMLReader used for analyzing an XML dump
16 * @var XMLReader|null
18 protected $xml = null;
20 /**
21 * XML dump schema version
23 * @var string
25 protected $schemaVersion;
27 /**
28 * @var array
30 private $varMapping = [];
32 /**
33 * @param string $schemaVersion see XML_DUMP_SCHEMA_VERSION_XX
35 public function __construct( $schemaVersion ) {
36 $this->schemaVersion = $schemaVersion;
39 /**
40 * Step the current XML reader until node start of given name is found.
42 * @param string $name Name of the element to look for
43 * (e.g.: "text" when looking for <text>)
45 * @param bool $allowAscend Whether the search should continue in parent
46 * nodes of the current position. If false (the default), the search will be aborted
47 * on the next closing element.
49 * @return bool True if the node could be found. false otherwise.
51 public function skipToNode( $name, $allowAscend = false ) {
52 $depth = 0;
53 while ( true ) {
54 $current = $this->xml->name;
55 if ( $this->xml->nodeType == XMLReader::ELEMENT ) {
56 if ( $current == $name ) {
57 return true;
60 if ( !$this->xml->isEmptyElement ) {
61 $depth++;
65 if ( $this->xml->nodeType == XMLReader::END_ELEMENT ) {
66 $depth--;
67 if ( $depth < 0 && !$allowAscend ) {
68 return false;
72 if ( !$this->xml->read() ) {
73 break;
77 return false;
80 /**
81 * Step the current XML reader until node start of given name is found,
82 * and advance to the first child node.
84 * @param string $name Name of the element to look for
85 * (e.g.: "text" when looking for <text>)
87 * @param bool $allowAscend Whether the search should continue in parent
88 * nodes of the current position. If false (the default), the search will be aborted
89 * on the next closing element.
91 public function skipIntoNode( $name, $allowAscend = false ) {
92 Assert::assertTrue( $this->skipToNode( $name, $allowAscend ),
93 "Skipping to $name" );
95 Assert::assertTrue( !$this->xml->isEmptyElement,
96 "Skipping into $name" );
98 $this->xml->read();
102 * Step the current XML reader until node end of given name is found.
104 * @param string $name Name of the closing element to look for
105 * (e.g.: "mediawiki" when looking for </mediawiki>)
107 * @return bool True if the end node could be found. false otherwise.
109 public function skipToNodeEnd( $name ) {
110 while ( $this->xml->read() ) {
111 if ( $this->xml->nodeType == XMLReader::END_ELEMENT &&
112 $this->xml->name == $name
114 return true;
118 return false;
122 * Step the current XML reader to the first element start after the node
123 * end of a given name.
125 * @param string $name Name of the closing element to look for
126 * (e.g.: "mediawiki" when looking for </mediawiki>)
128 * @return bool True if new element after the closing of $name could be
129 * found. false otherwise.
131 public function skipPastNodeEnd( $name ) {
132 Assert::assertTrue( $this->skipToNodeEnd( $name ),
133 "Skipping to end of $name" );
134 while ( $this->xml->read() ) {
135 if ( $this->xml->nodeType == XMLReader::ELEMENT ) {
136 return true;
140 return false;
144 * Opens an XML file to analyze.
146 * @param string $fname Name of file to analyze
148 public function open( $fname ) {
149 $this->xml = new XMLReader();
151 Assert::assertTrue( $this->xml->open( $fname ),
152 "Opening temporary file $fname via XMLReader failed" );
156 * Opens an XML file to analyze, verifies the top level tags,
157 * and skips past <siteinfo>.
159 * The contents of the <siteinfo> tag can be checked if $siteInfoTemplate
160 * is given. See assertDumpHead().
162 * @param string $fname Name of file to analyze
164 * @param string|null $siteInfoTemplate
165 * @param string $language
167 public function assertDumpStart( $fname, $siteInfoTemplate = null, $language = 'en' ) {
168 $this->open( $fname );
169 $this->assertDumpHead( $siteInfoTemplate, $language );
173 * Asserts that the head of a dump is valid.
174 * This checks the attributes of the top level <mediawiki> tag.
176 * If $siteInfoTemplate is given, it is interpreted as the file name
177 * of an XML template that will be used with assertDOM() to check the contents
178 * of the <siteinfo> tag, which is expected to be the first child of
179 * the top level <mediawiki>. Variable substitution applies as defined by
180 * calling setVarMapping().
182 * After this method returns, the XML reader's position will be after
183 * the closing </siteinfo> tag, before the next tag.
185 * @param string|null $siteInfoTemplate
186 * @param string $language
188 public function assertDumpHead( $siteInfoTemplate = null, $language = 'en' ) {
189 $this->assertNodeStart( 'mediawiki', false );
190 $this->assertAttributes( [
191 "xmlns" => "http://www.mediawiki.org/xml/export-{$this->schemaVersion}/",
192 "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
193 "xsi:schemaLocation" => "http://www.mediawiki.org/xml/export-{$this->schemaVersion}/ "
194 . "http://www.mediawiki.org/xml/export-{$this->schemaVersion}.xsd",
195 "version" => "{$this->schemaVersion}",
196 "xml:lang" => "{$language}"
197 ] );
199 $this->assertNodeStart( 'siteinfo', false );
201 if ( $siteInfoTemplate ) {
202 // Checking site info
203 $this->assertDOM( $siteInfoTemplate );
206 // skip past extra namespaces
207 $this->skipPastNodeEnd( 'siteinfo' );
211 * Asserts that the xml reader is at the final closing tag of an xml file and
212 * closes the reader.
214 * @param string $name (optional) the name of the final tag
215 * (e.g.: "mediawiki" for </mediawiki>)
217 public function assertDumpEnd( $name = "mediawiki" ) {
218 $this->assertNodeEnd( $name, false );
219 if ( $this->xml->read() ) {
220 $this->skipWhitespace();
222 Assert::assertEquals( $this->xml->nodeType, XMLReader::NONE,
223 "No proper entity left to parse" );
224 $this->close();
227 public function close() {
228 $this->xml->close();
232 * Steps the xml reader over white space
234 public function skipWhitespace() {
235 $cont = true;
236 while ( $cont && ( ( $this->xml->nodeType == XMLReader::NONE )
237 || ( $this->xml->nodeType == XMLReader::WHITESPACE )
238 || ( $this->xml->nodeType == XMLReader::SIGNIFICANT_WHITESPACE ) ) ) {
239 $cont = $this->xml->read();
244 * Asserts that the xml reader is at an element of given name, and optionally
245 * skips past it. If the reader is at a whitespace element, the whitespace is
246 * skipped first.
248 * @param string $name The name of the element to check for
249 * (e.g.: "mediawiki" for <mediawiki>)
250 * @param bool $skip (optional) if true, skip past the found element
252 public function assertNodeStart( $name, $skip = true ) {
253 $this->skipWhitespace();
254 Assert::assertEquals( $name, $this->xml->name, "Node name" );
255 Assert::assertEquals( XMLReader::ELEMENT, $this->xml->nodeType, "Node type" );
256 if ( $skip ) {
257 Assert::assertTrue( $this->xml->read(), "Skipping past start tag" );
262 * Asserts that the XML reader is at an element start, and that the element
263 * has the given attributes with the given values.
264 * Variable substitution applies for variables set via setVarMapping().
266 * @param array $attributes
267 * @param bool $skip (optional) if true, skip past the found element
269 public function assertAttributes( $attributes, $skip = true ) {
270 Assert::assertEquals( XMLReader::ELEMENT, $this->xml->nodeType, "Node type" );
271 $actualAttributes = $this->getAttributeArray( $this->xml );
273 $attributes = array_map(
274 function ( $v ) {
275 return $this->resolveVars( $v );
277 $attributes
279 $actualAttributes = array_intersect_key( $actualAttributes, $attributes );
281 Assert::assertEquals( $attributes, $actualAttributes, "Attributes" );
283 if ( $skip ) {
284 Assert::assertTrue( $this->xml->read(), "Skipping past start tag" );
289 * Asserts that the xml reader is at an element of given name, and that element
290 * is an empty tag.
292 * @param string $name The name of the element to check for
293 * (e.g.: "text" for <text/>)
294 * @param bool $skip (optional) if true, skip past the found element
295 * @param bool $skip_ws (optional) if true, also skip past white spaces that trail the
296 * closing element.
298 public function assertEmptyNode( $name, $skip = true, $skip_ws = true ) {
299 $this->assertNodeStart( $name, false );
300 Assert::assertFalse( !$this->xml->isEmptyElement, "$name tag has content" );
302 if ( $skip ) {
303 Assert::assertTrue( $this->xml->read(), "Skipping $name tag" );
304 if ( ( $this->xml->nodeType == XMLReader::END_ELEMENT )
305 && ( $this->xml->name == $name )
307 $this->xml->read();
310 if ( $skip_ws ) {
311 $this->skipWhitespace();
317 * Asserts that the xml reader is at a closing element of given name, and optionally
318 * skips past it. If the reader is at a whitespace element, the whitespace is
319 * skipped first.
321 * @param string $name The name of the closing element to check for
322 * (e.g.: "mediawiki" for </mediawiki>)
323 * @param bool $skip (optional) if true, skip past the found element
325 public function assertNodeEnd( $name, $skip = true ) {
326 $this->skipWhitespace();
327 Assert::assertEquals( $name, $this->xml->name, "Node name" );
328 Assert::assertEquals( XMLReader::END_ELEMENT, $this->xml->nodeType, "Node type" );
329 if ( $skip ) {
330 // note: if there is no more content after the tag and read() returns false,
331 // that's fine.
332 $this->xml->read();
337 * Asserts that the xml reader is at an element of given tag that contains a given text,
338 * and skips over the element.
340 * @param string $name The name of the element to check for
341 * (e.g.: "mediawiki" for <mediawiki>...</mediawiki>)
342 * @param string|bool $text If string, check if it equals the elements text.
343 * Variable substitution applies. If false, ignore the element's text.
344 * @param bool $skip_ws (optional) if true, skip past white spaces that trail the
345 * closing element.
347 public function assertTextNode( $name, $text, $skip_ws = true ) {
348 $this->assertNodeStart( $name );
350 if ( $text !== false ) {
351 $text = $this->resolveVars( $text );
352 $actual = $this->resolveVars( $this->xml->value );
353 Assert::assertEquals( $text, $actual, "Text of node " . $name );
355 Assert::assertTrue( $this->xml->read(), "Skipping past processed text of " . $name );
356 $this->assertNodeEnd( $name );
358 if ( $skip_ws ) {
359 $this->skipWhitespace();
364 * Asserts that the xml reader is at the start of a page element and skips over the first
365 * tags, after checking them.
367 * Besides the opening page element, this function also checks for and skips over the
368 * title, ns, and id tags. Hence after this function, the xml reader is at the first
369 * revision of the current page.
371 * @param int $id Id of the page to assert
372 * @param int $ns Number of namespage to assert
373 * @param string $name Title of the current page
375 public function assertPageStart( $id, $ns, $name ) {
376 $this->assertNodeStart( "page" );
377 $this->assertTextNode( "title", $name );
378 $this->assertTextNode( "ns", $ns );
379 $this->assertTextNode( "id", $id );
383 * Asserts that the xml reader is at the page's closing element and skips to the next
384 * element.
386 public function assertPageEnd() {
387 $this->assertNodeEnd( "page" );
391 * Checks and skips tags that represent the properties of a revision.
393 * @param int $id Id of the revision
394 * @param string $summary Summary of the revision
395 * @param string $text_sha1 The base36 SHA-1 of the revision's text
396 * @param string $hasEarlyText Whether a text tag is expected before the <sha1> tag.
397 * Must be one of 'yes', 'no', or maybe.
398 * @param int|bool $parentid (optional) id of the parent revision
399 * @param string $model The expected content model id (default: CONTENT_MODEL_WIKITEXT)
400 * @param string $format The expected format model id (default: CONTENT_FORMAT_WIKITEXT)
401 * @param bool &$foundText Output, whether a text tag was found before the SHA1 tag.
402 * If this returns false, the text tag should be the next tag after the method returns.
404 public function assertRevisionProperties( $id, $summary,
405 $text_sha1, $hasEarlyText = 'maybe', $parentid = false,
406 $model = CONTENT_MODEL_WIKITEXT, $format = CONTENT_FORMAT_WIKITEXT,
407 &$foundText = ''
409 $this->assertTextNode( "id", $id );
410 if ( $parentid !== false ) {
411 $this->assertTextNode( "parentid", $parentid );
413 $this->assertTextNode( "timestamp", false );
415 $this->assertNodeStart( "contributor" );
416 $this->assertTextNode( "username", false );
417 $this->assertTextNode( "id", false );
418 $this->assertNodeEnd( "contributor" );
420 $this->assertTextNode( "comment", $summary );
422 if ( $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11 ) {
423 $this->assertTextNode( "origin", false );
426 $this->assertTextNode( "model", $model );
428 $this->assertTextNode( "format", $format );
430 if ( $hasEarlyText === 'yes' || ( $this->xml->name == "text" && $hasEarlyText === 'maybe' ) ) {
431 $foundText = true;
432 $this->assertNodeStart( "text", false );
433 $this->xml->next();
434 $this->skipWhitespace();
435 } else {
436 $foundText = false;
439 if ( $text_sha1 ) {
440 $this->assertTextNode( "sha1", $text_sha1 );
441 } else {
442 $this->assertEmptyNode( "sha1" );
447 * Asserts that the xml reader is at a revision and checks its representation before
448 * skipping over it.
450 * @param int $id Id of the revision
451 * @param string $summary Summary of the revision
452 * @param int $text_id Id of the revision's text
453 * @param int $text_bytes Number of bytes in the revision's text
454 * @param string $text_sha1 The base36 SHA-1 of the revision's text
455 * @param string|bool $text (optional) The revision's string, or false to check for a
456 * revision stub
457 * @param int|bool $parentid (optional) id of the parent revision
458 * @param string $model The expected content model id (default: CONTENT_MODEL_WIKITEXT)
459 * @param string $format The expected format model id (default: CONTENT_FORMAT_WIKITEXT)
461 public function assertRevision( $id, $summary, $text_id, $text_bytes,
462 $text_sha1, $text = false, $parentid = false,
463 $model = CONTENT_MODEL_WIKITEXT, $format = CONTENT_FORMAT_WIKITEXT
465 $this->assertNodeStart( "revision" );
467 $this->assertRevisionProperties(
468 $id,
469 $summary,
470 $text_sha1,
471 'maybe',
472 $parentid,
473 $model,
474 $format,
475 $text_found
478 if ( !$text_found ) {
479 $this->assertText( $id, $text_id, $text_bytes, $text );
482 $this->assertNodeEnd( "revision" );
483 $this->skipWhitespace();
486 public function assertText( $id, $text_id, $text_bytes, $text ) {
487 $this->assertNodeStart( "text", false );
488 if ( $text_bytes !== false ) {
489 Assert::assertEquals( $this->xml->getAttribute( "bytes" ), $text_bytes,
490 "Attribute 'bytes' of revision " . $id );
493 if ( $text === false ) {
494 Assert::assertEquals( $this->xml->getAttribute( "id" ), $text_id,
495 "Text id of revision " . $id );
496 Assert::assertNull( $this->xml->getAttribute( "xml:space" ),
497 "xml:space attribute shout not be present" );
498 $this->assertEmptyNode( "text" );
499 } else {
500 // Testing for a real dump
501 Assert::assertEquals( $this->xml->getAttribute( "xml:space" ), "preserve",
502 "xml:space=preserve should be present" );
503 Assert::assertTrue( $this->xml->read(), "Skipping text start tag" );
504 Assert::assertEquals( $text, $this->xml->value, "Text of revision " . $id );
505 Assert::assertTrue( $this->xml->read(), "Skipping past text" );
506 $this->assertNodeEnd( "text" );
507 $this->skipWhitespace();
512 * asserts that the xml reader is at the beginning of a log entry and skips over
513 * it while analyzing it.
515 * @param int $id Id of the log entry
516 * @param string $user_name User name of the log entry's performer
517 * @param int $user_id User id of the log entry 's performer
518 * @param string|null $comment Comment of the log entry. If null, the comment text is ignored.
519 * @param string $type Type of the log entry
520 * @param string $subtype Subtype of the log entry
521 * @param string $title Title of the log entry's target
522 * @param array $parameters (optional) unserialized data accompanying the log entry
524 public function assertLogItem( $id, $user_name, $user_id, $comment, $type,
525 $subtype, $title, $parameters = []
527 $this->assertNodeStart( "logitem" );
529 $this->assertTextNode( "id", $id );
530 $this->assertTextNode( "timestamp", false );
532 $this->assertNodeStart( "contributor" );
533 $this->assertTextNode( "username", $user_name );
534 $this->assertTextNode( "id", $user_id );
535 $this->assertNodeEnd( "contributor" );
537 if ( $comment !== null ) {
538 $this->assertTextNode( "comment", $comment );
540 $this->assertTextNode( "type", $type );
541 $this->assertTextNode( "action", $subtype );
542 $this->assertTextNode( "logtitle", $title );
544 $this->assertNodeStart( "params" );
545 $parameters_xml = unserialize( $this->xml->value );
546 Assert::assertEquals( $parameters, $parameters_xml );
547 Assert::assertTrue( $this->xml->read(), "Skipping past processed text of params" );
548 $this->assertNodeEnd( "params" );
550 $this->assertNodeEnd( "logitem" );
554 * Returns the XMLReader's current line number for reporting.
556 * @param XMLReader|null $xml
558 * @return int
560 public function getLineNumber( ?XMLReader $xml = null ) {
561 $xml ??= $this->xml;
563 if ( $xml->nodeType == XMLReader::NONE ) {
564 return 0;
567 return $xml->expand()->getLineNo();
571 * Opens an XML template file and compares it to the XML structure at the current position of
572 * this asserter.
574 * If the outer-most tag of the template file is <test:data>, that tag is
575 * ignored during comparison. This allows template files to contain arbitrary snippets of XML.
576 * When the tag <test:end/> is encountered in the template, the comparison is ended.
577 * This allows template files to be written to match the beginning of a structure,
578 * without the need for subsequent contents to match.
580 * The contents of $file are subject to variable substitution based on
581 * the values provided via setVarMapping().
583 * @param string $file Name of file to analyze
585 public function assertDOM( $file ) {
586 $exXml = new XMLReader();
588 Assert::assertTrue( $exXml->open( $file ),
589 "Opening fixture file $file via XMLReader failed" );
591 $line = 0;
592 while ( true ) {
593 $line = max( $line, $this->getLineNumber( $exXml ) );
594 $location = "[$file line $line] ";
596 while ( $exXml->nodeType == XMLReader::NONE
597 || $exXml->nodeType == XMLReader::WHITESPACE
598 || $exXml->nodeType == XMLReader::SIGNIFICANT_WHITESPACE
599 || $exXml->nodeType == XMLReader::COMMENT
600 || ( $exXml->nodeType == XMLReader::ELEMENT && $exXml->name === 'test:data' ) ) {
602 // Reached the end of the template file, so we are done here.
603 if ( !$exXml->read() ) {
604 break 2;
607 // Reached the end of the test data, so we are done here.
608 if ( $exXml->nodeType == XMLReader::END_ELEMENT && $exXml->name === 'test:data' ) {
609 break 2;
613 while ( $this->xml->nodeType == XMLReader::NONE
614 || $this->xml->nodeType == XMLReader::WHITESPACE
615 || $this->xml->nodeType == XMLReader::SIGNIFICANT_WHITESPACE
616 || $this->xml->nodeType == XMLReader::COMMENT ) {
617 Assert::assertTrue( $this->xml->read(), $location . 'Document ended unexpectedly' );
620 // End comparison early, ignore the rest of the contents of the template file.
621 if ( $exXml->nodeType == XMLReader::ELEMENT && $exXml->name === 'test:end' ) {
622 break;
625 $line = max( $line, $this->getLineNumber( $exXml ) );
626 $location = "[$file line $line] ";
628 Assert::assertSame( $exXml->nodeType, $this->xml->nodeType, $location . 'Node type' );
629 Assert::assertSame( $exXml->name, $this->xml->name, $location . 'Node type' );
630 Assert::assertSame(
631 $exXml->hasValue,
632 $this->xml->hasValue,
633 $location . 'Node has value?'
635 Assert::assertSame(
636 $exXml->hasAttributes,
637 $this->xml->hasAttributes,
638 $location . 'Node has attributes?'
641 if ( $exXml->hasValue ) {
642 $expValue = $this->resolveVars( $exXml->value );
643 $actValue = $this->resolveVars( $this->xml->value );
644 Assert::assertSame( $expValue, $actValue, $location . 'Node value' );
647 if ( $exXml->hasAttributes ) {
648 $expectedAttributes = $this->getAttributeArray( $exXml );
649 $actualAttributes = $this->getAttributeArray( $this->xml );
651 Assert::assertEquals( $expectedAttributes, $actualAttributes, $location . 'Attributes' );
654 // Reached the end of the template file, so we are done here.
655 if ( !$exXml->read() ) {
656 break;
659 // Reached the end of the test data, so we are done here.
660 if ( $exXml->nodeType == XMLReader::END_ELEMENT && $exXml->name === 'test:data' ) {
661 break;
664 Assert::assertTrue( $this->xml->read(), $location . 'Document ended unexpectedly' );
667 $exXml->close();
671 * Strip any <test:...> tags from a string.
673 * @param string $text
675 * @return string
677 public function stripTestTags( $text ) {
678 $text = preg_replace( '@<!--.*?-->@s', '', $text );
679 $text = preg_replace( '@</?test:[^>]+>@', '', $text );
680 return $text;
683 private function getAttributeArray( ?XMLReader $xml = null ) {
684 if ( !$xml ) {
685 $xml = $this->xml;
688 if ( $xml->nodeType !== XMLReader::ELEMENT ) {
689 return null;
692 if ( !$xml->hasAttributes ) {
693 return [];
696 $attr = [];
697 while ( $xml->moveToNextAttribute() ) {
698 $attr[$xml->name] = $this->resolveVars( $xml->value );
701 return $attr;
705 * @param string $text
707 * @return string
709 public function resolveVars( $text ) {
710 return str_replace(
711 array_keys( $this->varMapping ),
712 array_values( $this->varMapping ),
713 $text
718 * Define a variable mapping to be applied by assertDOM
720 * @param string $name
721 * @param string $value
723 public function setVarMapping( $name, $value ) {
724 $key = '{{' . $name . '}}';
725 $this->varMapping[$key] = $value;
729 * @return string
731 public function getSchemaVersion() {
732 return $this->schemaVersion;