tests/phpunit/maintenance/DumpTestCase.php

   1 <?php
   2
   3 /**
   4  * Base TestCase for dumps
   5  */
   6 abstract class DumpTestCase extends MediaWikiLangTestCase {
   7
   8         /**
   9          * exception to be rethrown once in sound PHPUnit surrounding
  10          *
  11          * As the current MediaWikiTestCase::run is not robust enough to recover
  12          * from thrown exceptions directly, we cannot throw frow within
  13          * self::addDBData, although it would be appropriate. Hence, we catch the
  14          * exception and store it until we are in setUp and may finally rethrow
  15          * the exception without crashing the test suite.
  16          *
  17          * @var Exception|null
  18          */
  19         protected $exceptionFromAddDBData = null;
  20
  21         /**
  22          * Holds the XMLReader used for analyzing an XML dump
  23          *
  24          * @var XMLReader|null
  25          */
  26         protected $xml = null;
  27
  28         /**
  29          * Adds a revision to a page, while returning the resuting revision's id
  30          *
  31          * @param Page $page Page to add the revision to
  32          * @param string $text Revisions text
  33          * @param string $summary Revisions summary
  34          * @param string $model The model ID (defaults to wikitext)
  35          *
  36          * @throws MWException
  37          * @return array
  38          */
  39         protected function addRevision( Page $page, $text, $summary, $model = CONTENT_MODEL_WIKITEXT ) {
  40                 $status = $page->doEditContent(
  41                         ContentHandler::makeContent( $text, $page->getTitle(), $model ),
  42                         $summary
  43                 );
  44
  45                 if ( $status->isGood() ) {
  46                         $value = $status->getValue();
  47                         $revision = $value['revision'];
  48                         $revision_id = $revision->getId();
  49                         $text_id = $revision->getTextId();
  50
  51                         if ( ( $revision_id > 0 ) && ( $text_id > 0 ) ) {
  52                                 return [ $revision_id, $text_id ];
  53                         }
  54                 }
  55
  56                 throw new MWException( "Could not determine revision id ("
  57                         . $status->getWikiText( false, false, 'en' ) . ")" );
  58         }
  59
  60         /**
  61          * gunzips the given file and stores the result in the original file name
  62          *
  63          * @param string $fname Filename to read the gzipped data from and stored
  64          *   the gunzipped data into
  65          */
  66         protected function gunzip( $fname ) {
  67                 $gzipped_contents = file_get_contents( $fname );
  68                 if ( $gzipped_contents === false ) {
  69                         $this->fail( "Could not get contents of $fname" );
  70                 }
  71
  72                 $contents = gzdecode( $gzipped_contents );
  73
  74                 $this->assertEquals(
  75                         strlen( $contents ),
  76                         file_put_contents( $fname, $contents ),
  77                         '# bytes written'
  78                 );
  79         }
  80
  81         /**
  82          * Default set up function.
  83          *
  84          * Clears $wgUser, and reports errors from addDBData to PHPUnit
  85          */
  86         protected function setUp() {
  87                 parent::setUp();
  88
  89                 // Check if any Exception is stored for rethrowing from addDBData
  90                 // @see self::exceptionFromAddDBData
  91                 if ( $this->exceptionFromAddDBData !== null ) {
  92                         throw $this->exceptionFromAddDBData;
  93                 }
  94
  95                 $this->setMwGlobals( 'wgUser', new User() );
  96         }
  97
  98         /**
  99          * Checks for test output consisting only of lines containing ETA announcements
 100          */
 101         function expectETAOutput() {
 102                 // Newer PHPUnits require assertion about the output using PHPUnit's own
 103                 // expectOutput[...] functions. However, the PHPUnit shipped prediactes
 104                 // do not allow to check /each/ line of the output using /readable/ REs.
 105                 // So we ...
 106
 107                 // 1. ... add a dummy output checking to make PHPUnit not complain
 108                 //    about unchecked test output
 109                 $this->expectOutputRegex( '//' );
 110
 111                 // 2. Do the real output checking on our own.
 112                 $lines = explode( "\n", $this->getActualOutput() );
 113                 $this->assertGreaterThan( 1, count( $lines ), "Minimal lines of produced output" );
 114                 $this->assertEquals( '', array_pop( $lines ), "Output ends in LF" );
 115                 $timestamp_re = "[0-9]{4}-[01][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-6][0-9]";
 116                 foreach ( $lines as $line ) {
 117                         $this->assertRegExp(
 118                                 "/$timestamp_re: .* \(ID [0-9]+\) [0-9]* pages .*, [0-9]* revs .*, ETA/",
 119                                 $line
 120                         );
 121                 }
 122         }
 123
 124         /**
 125          * Step the current XML reader until node end of given name is found.
 126          *
 127          * @param string $name Name of the closing element to look for
 128          *   (e.g.: "mediawiki" when looking for </mediawiki>)
 129          *
 130          * @return bool True if the end node could be found. false otherwise.
 131          */
 132         protected function skipToNodeEnd( $name ) {
 133                 while ( $this->xml->read() ) {
 134                         if ( $this->xml->nodeType == XMLReader::END_ELEMENT &&
 135                                 $this->xml->name == $name
 136                         ) {
 137                                 return true;
 138                         }
 139                 }
 140
 141                 return false;
 142         }
 143
 144         /**
 145          * Step the current XML reader to the first element start after the node
 146          * end of a given name.
 147          *
 148          * @param string $name Name of the closing element to look for
 149          *   (e.g.: "mediawiki" when looking for </mediawiki>)
 150          *
 151          * @return bool True if new element after the closing of $name could be
 152          *   found. false otherwise.
 153          */
 154         protected function skipPastNodeEnd( $name ) {
 155                 $this->assertTrue( $this->skipToNodeEnd( $name ),
 156                         "Skipping to end of $name" );
 157                 while ( $this->xml->read() ) {
 158                         if ( $this->xml->nodeType == XMLReader::ELEMENT ) {
 159                                 return true;
 160                         }
 161                 }
 162
 163                 return false;
 164         }
 165
 166         /**
 167          * Opens an XML file to analyze and optionally skips past siteinfo.
 168          *
 169          * @param string $fname Name of file to analyze
 170          * @param bool $skip_siteinfo (optional) If true, step the xml reader
 171          *   to the first element after </siteinfo>
 172          */
 173         protected function assertDumpStart( $fname, $skip_siteinfo = true ) {
 174                 $this->xml = new XMLReader();
 175                 $this->assertTrue( $this->xml->open( $fname ),
 176                         "Opening temporary file $fname via XMLReader failed" );
 177                 if ( $skip_siteinfo ) {
 178                         $this->assertTrue( $this->skipPastNodeEnd( "siteinfo" ),
 179                                 "Skipping past end of siteinfo" );
 180                 }
 181         }
 182
 183         /**
 184          * Asserts that the xml reader is at the final closing tag of an xml file and
 185          * closes the reader.
 186          *
 187          * @param string $name (optional) the name of the final tag
 188          *   (e.g.: "mediawiki" for </mediawiki>)
 189          */
 190         protected function assertDumpEnd( $name = "mediawiki" ) {
 191                 $this->assertNodeEnd( $name, false );
 192                 if ( $this->xml->read() ) {
 193                         $this->skipWhitespace();
 194                 }
 195                 $this->assertEquals( $this->xml->nodeType, XMLReader::NONE,
 196                         "No proper entity left to parse" );
 197                 $this->xml->close();
 198         }
 199
 200         /**
 201          * Steps the xml reader over white space
 202          */
 203         protected function skipWhitespace() {
 204                 $cont = true;
 205                 while ( $cont && ( ( $this->xml->nodeType == XMLReader::WHITESPACE )
 206                         || ( $this->xml->nodeType == XMLReader::SIGNIFICANT_WHITESPACE ) ) ) {
 207                         $cont = $this->xml->read();
 208                 }
 209         }
 210
 211         /**
 212          * Asserts that the xml reader is at an element of given name, and optionally
 213          * skips past it.
 214          *
 215          * @param string $name The name of the element to check for
 216          *   (e.g.: "mediawiki" for <mediawiki>)
 217          * @param bool $skip (optional) if true, skip past the found element
 218          */
 219         protected function assertNodeStart( $name, $skip = true ) {
 220                 $this->assertEquals( $name, $this->xml->name, "Node name" );
 221                 $this->assertEquals( XMLReader::ELEMENT, $this->xml->nodeType, "Node type" );
 222                 if ( $skip ) {
 223                         $this->assertTrue( $this->xml->read(), "Skipping past start tag" );
 224                 }
 225         }
 226
 227         /**
 228          * Asserts that the xml reader is at an closing element of given name, and optionally
 229          * skips past it.
 230          *
 231          * @param string $name The name of the closing element to check for
 232          *   (e.g.: "mediawiki" for </mediawiki>)
 233          * @param bool $skip (optional) if true, skip past the found element
 234          */
 235         protected function assertNodeEnd( $name, $skip = true ) {
 236                 $this->assertEquals( $name, $this->xml->name, "Node name" );
 237                 $this->assertEquals( XMLReader::END_ELEMENT, $this->xml->nodeType, "Node type" );
 238                 if ( $skip ) {
 239                         $this->assertTrue( $this->xml->read(), "Skipping past end tag" );
 240                 }
 241         }
 242
 243         /**
 244          * Asserts that the xml reader is at an element of given tag that contains a given text,
 245          * and skips over the element.
 246          *
 247          * @param string $name The name of the element to check for
 248          *   (e.g.: "mediawiki" for <mediawiki>...</mediawiki>)
 249          * @param string|bool $text If string, check if it equals the elements text.
 250          *   If false, ignore the element's text
 251          * @param bool $skip_ws (optional) if true, skip past white spaces that trail the
 252          *   closing element.
 253          */
 254         protected function assertTextNode( $name, $text, $skip_ws = true ) {
 255                 $this->assertNodeStart( $name );
 256
 257                 if ( $text !== false ) {
 258                         $this->assertEquals( $text, $this->xml->value, "Text of node " . $name );
 259                 }
 260                 $this->assertTrue( $this->xml->read(), "Skipping past processed text of " . $name );
 261                 $this->assertNodeEnd( $name );
 262
 263                 if ( $skip_ws ) {
 264                         $this->skipWhitespace();
 265                 }
 266         }
 267
 268         /**
 269          * Asserts that the xml reader is at the start of a page element and skips over the first
 270          * tags, after checking them.
 271          *
 272          * Besides the opening page element, this function also checks for and skips over the
 273          * title, ns, and id tags. Hence after this function, the xml reader is at the first
 274          * revision of the current page.
 275          *
 276          * @param int $id Id of the page to assert
 277          * @param int $ns Number of namespage to assert
 278          * @param string $name Title of the current page
 279          */
 280         protected function assertPageStart( $id, $ns, $name ) {
 281
 282                 $this->assertNodeStart( "page" );
 283                 $this->skipWhitespace();
 284
 285                 $this->assertTextNode( "title", $name );
 286                 $this->assertTextNode( "ns", $ns );
 287                 $this->assertTextNode( "id", $id );
 288         }
 289
 290         /**
 291          * Asserts that the xml reader is at the page's closing element and skips to the next
 292          * element.
 293          */
 294         protected function assertPageEnd() {
 295                 $this->assertNodeEnd( "page" );
 296                 $this->skipWhitespace();
 297         }
 298
 299         /**
 300          * Asserts that the xml reader is at a revision and checks its representation before
 301          * skipping over it.
 302          *
 303          * @param int $id Id of the revision
 304          * @param string $summary Summary of the revision
 305          * @param int $text_id Id of the revision's text
 306          * @param int $text_bytes Number of bytes in the revision's text
 307          * @param string $text_sha1 The base36 SHA-1 of the revision's text
 308          * @param string|bool $text (optional) The revision's string, or false to check for a
 309          *            revision stub
 310          * @param int|bool $parentid (optional) id of the parent revision
 311          * @param string $model The expected content model id (default: CONTENT_MODEL_WIKITEXT)
 312          * @param string $format The expected format model id (default: CONTENT_FORMAT_WIKITEXT)
 313          */
 314         protected function assertRevision( $id, $summary, $text_id, $text_bytes,
 315                 $text_sha1, $text = false, $parentid = false,
 316                 $model = CONTENT_MODEL_WIKITEXT, $format = CONTENT_FORMAT_WIKITEXT
 317         ) {
 318                 $this->assertNodeStart( "revision" );
 319                 $this->skipWhitespace();
 320
 321                 $this->assertTextNode( "id", $id );
 322                 if ( $parentid !== false ) {
 323                         $this->assertTextNode( "parentid", $parentid );
 324                 }
 325                 $this->assertTextNode( "timestamp", false );
 326
 327                 $this->assertNodeStart( "contributor" );
 328                 $this->skipWhitespace();
 329                 $this->assertTextNode( "ip", false );
 330                 $this->assertNodeEnd( "contributor" );
 331                 $this->skipWhitespace();
 332
 333                 $this->assertTextNode( "comment", $summary );
 334                 $this->skipWhitespace();
 335
 336                 $this->assertTextNode( "model", $model );
 337                 $this->skipWhitespace();
 338
 339                 $this->assertTextNode( "format", $format );
 340                 $this->skipWhitespace();
 341
 342                 if ( $this->xml->name == "text" ) {
 343                         // note: <text> tag may occur here or at the very end.
 344                         $text_found = true;
 345                         $this->assertText( $id, $text_id, $text_bytes, $text );
 346                 } else {
 347                         $text_found = false;
 348                 }
 349
 350                 $this->assertTextNode( "sha1", $text_sha1 );
 351
 352                 if ( !$text_found ) {
 353                         $this->assertText( $id, $text_id, $text_bytes, $text );
 354                 }
 355
 356                 $this->assertNodeEnd( "revision" );
 357                 $this->skipWhitespace();
 358         }
 359
 360         protected function assertText( $id, $text_id, $text_bytes, $text ) {
 361                 $this->assertNodeStart( "text", false );
 362                 if ( $text_bytes !== false ) {
 363                         $this->assertEquals( $this->xml->getAttribute( "bytes" ), $text_bytes,
 364                                 "Attribute 'bytes' of revision " . $id );
 365                 }
 366
 367                 if ( $text === false ) {
 368                         // Testing for a stub
 369                         $this->assertEquals( $this->xml->getAttribute( "id" ), $text_id,
 370                                 "Text id of revision " . $id );
 371                         $this->assertFalse( $this->xml->hasValue, "Revision has text" );
 372                         $this->assertTrue( $this->xml->read(), "Skipping text start tag" );
 373                         if ( ( $this->xml->nodeType == XMLReader::END_ELEMENT )
 374                                 && ( $this->xml->name == "text" )
 375                         ) {
 376
 377                                 $this->xml->read();
 378                         }
 379                         $this->skipWhitespace();
 380                 } else {
 381                         // Testing for a real dump
 382                         $this->assertTrue( $this->xml->read(), "Skipping text start tag" );
 383                         $this->assertEquals( $text, $this->xml->value, "Text of revision " . $id );
 384                         $this->assertTrue( $this->xml->read(), "Skipping past text" );
 385                         $this->assertNodeEnd( "text" );
 386                         $this->skipWhitespace();
 387                 }
 388         }
 389 }