3 namespace MediaWiki\Tests\Maintenance
;
6 use MediaWiki\Maintenance\TextPassDumper
;
7 use MediaWiki\Revision\RevisionRecord
;
8 use MediaWiki\Revision\SlotRecord
;
9 use MediaWikiLangTestCase
;
14 * Tests for TextPassDumper that rely on the database
16 * Some of these tests use the old constuctor for TextPassDumper
17 * and the dump() function, while others use the new loadWithArgv( $args )
18 * function and execute(). This is to ensure both the old and new methods
23 * @covers \MediaWiki\Maintenance\TextPassDumper
25 class TextPassDumperDatabaseTest
extends DumpTestCase
{
27 use PageDumpTestDataTrait
;
29 public function addDBData() {
32 $this->addTestPages( $this->getTestSysop()->getUser() );
35 public function schemaVersionProvider() {
36 foreach ( XmlDumpWriter
::$supportedSchemas as $schemaVersion ) {
37 yield
[ $schemaVersion ];
42 * @dataProvider schemaVersionProvider
44 public function testFullTextPlain( $schemaVersion ) {
45 // Setting up the dump
46 $nameStub = $this->setUpStub( 'AllStubs', $schemaVersion );
47 $nameFull = $this->getNewTempFile();
49 $dumper = new TextPassDumper( [ "--stub=file:" . $nameStub,
50 "--output=file:" . $nameFull, '--schema-version', $schemaVersion ] );
51 $dumper->reporting
= false;
52 $dumper->setDB( $this->getDb() );
54 // Performing the dump
55 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
57 // Checking the dumped data
58 $this->assertDumpSchema( $nameFull, $this->getXmlSchemaPath( $schemaVersion ) );
60 $asserter = $this->getDumpAsserter( $schemaVersion );
61 $this->setSiteVarMappings( $asserter );
62 $this->setAllRevisionsVarMappings( $asserter );
64 $siteInfoTemplate = $this->getDumpTemplatePath( 'SiteInfo', $schemaVersion );
65 $pagesTemplate = $this->getDumpTemplatePath( 'AllText', $schemaVersion );
67 $asserter->open( $nameFull );
68 $asserter->assertDumpHead( $siteInfoTemplate );
69 $asserter->assertDOM( $pagesTemplate );
70 $asserter->assertDumpEnd();
73 public function testPrefetchPlain() {
74 global $wgXmlDumpSchemaVersion;
76 /** @var RevisionRecord[] $revisions */
78 $this->rev1_1
->getId() => $this->rev1_1
,
79 $this->rev2_1
->getId() => $this->rev2_1
,
80 $this->rev2_2
->getId() => $this->rev2_2
,
81 $this->rev2_3
->getId() => $this->rev2_3
,
82 $this->rev2_4
->getId() => $this->rev2_4
,
83 $this->rev4_1
->getId() => $this->rev4_1
,
86 $getPrefetchText = static function ( $pageid, $revid, $role ) use ( $revisions ) {
87 $rev = $revisions[$revid];
88 $slot = $rev->getSlot( $role );
90 // NOTE: TextPassDumper does a check on the string length,
91 // so we have to pad to match the original length. The hash is not checked.
92 return str_pad( "Prefetch: ({$pageid}/{$revid}/$role)", $slot->getSize(), '*' );
96 $prefetchMock = $this->getMockBuilder( BaseDump
::class )
97 ->onlyMethods( [ 'prefetch' ] )
98 ->disableOriginalConstructor()
100 $prefetchMock->method( 'prefetch' )
101 ->willReturnCallback( $getPrefetchText );
103 // Setting up of the dump
104 $nameStub = $this->setUpStub( 'AllStubs', $wgXmlDumpSchemaVersion );
105 $nameFull = $this->getNewTempFile();
107 $dumper = new TextPassDumper( [ "--stub=file:" . $nameStub,
108 "--output=file:" . $nameFull ] );
110 $dumper->prefetch
= $prefetchMock;
111 $dumper->reporting
= false;
112 $dumper->setDB( $this->getDb() );
114 // Performing the dump
115 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
117 // Checking the dumped data
118 $this->assertDumpSchema( $nameFull, $this->getXmlSchemaPath( $wgXmlDumpSchemaVersion ) );
120 $asserter = $this->getDumpAsserter( $wgXmlDumpSchemaVersion );
121 $this->setSiteVarMappings( $asserter );
122 $this->setAllRevisionsVarMappings( $asserter );
124 $siteInfoTemplate = $this->getDumpTemplatePath( 'SiteInfo', $wgXmlDumpSchemaVersion );
125 $pagesTemplate = $this->getDumpTemplatePath( 'AllText', $wgXmlDumpSchemaVersion );
127 $asserter->setVarMapping(
129 $getPrefetchText( $this->rev1_1
->getPageId(), $this->rev1_1
->getId(), SlotRecord
::MAIN
)
131 $asserter->setVarMapping(
133 $getPrefetchText( $this->rev1_1
->getPageId(), $this->rev1_1
->getId(), 'aux' )
135 $asserter->setVarMapping(
137 $getPrefetchText( $this->rev2_1
->getPageId(), $this->rev2_1
->getId(), SlotRecord
::MAIN
)
139 $asserter->setVarMapping(
141 $getPrefetchText( $this->rev2_2
->getPageId(), $this->rev2_2
->getId(), SlotRecord
::MAIN
)
143 $asserter->setVarMapping(
145 $getPrefetchText( $this->rev2_3
->getPageId(), $this->rev2_3
->getId(), SlotRecord
::MAIN
)
147 $asserter->setVarMapping(
149 $getPrefetchText( $this->rev2_4
->getPageId(), $this->rev2_4
->getId(), SlotRecord
::MAIN
)
151 $asserter->setVarMapping(
153 $getPrefetchText( $this->rev4_1
->getPageId(), $this->rev4_1
->getId(), SlotRecord
::MAIN
)
156 $asserter->open( $nameFull );
157 $asserter->assertDumpHead( $siteInfoTemplate );
158 $asserter->assertDOM( $pagesTemplate );
159 $asserter->assertDumpEnd();
163 * Ensures that checkpoint dumps are used and written, by successively increasing the
164 * stub size and dumping until the duration crosses a threshold.
166 * @param string $checkpointFormat Either "file" for plain text or "gzip" for gzipped
169 private function checkpointHelper( $checkpointFormat = "file" ) {
170 global $wgXmlDumpSchemaVersion;
172 // Getting temporary names
173 $nameStub = $this->getNewTempFile();
174 $nameOutputDir = $this->getNewTempDirectory();
176 $stderr = fopen( 'php://output', 'a' );
177 if ( $stderr === false ) {
178 $this->fail( "Could not open stream for stderr" );
181 $iterations = 32; // We'll start with that many iterations of revisions
182 // in stub. Make sure that the generated volume is above the buffer size
183 // set below. Otherwise, the checkpointing does not trigger.
185 $minDuration = 2; // We want the dump to take at least this many seconds
186 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
188 // Until a dump takes at least $minDuration seconds, perform a dump and check
189 // duration. If the dump did not take long enough increase the iteration
190 // count, to generate a bigger stub file next time.
191 while ( $lastDuration < $minDuration ) {
192 // Setting up the dump
193 wfRecursiveRemoveDir( $nameOutputDir );
194 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
195 "Creating temporary output directory " );
196 $this->setUpStub( 'AllStubs', $wgXmlDumpSchemaVersion, $nameStub, $iterations );
197 $dumper = new TextPassDumper();
198 $dumper->loadWithArgv( [ "--stub=file:" . $nameStub,
199 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
200 "--maxtime=1", // This is in minutes. Fixup is below
201 "--buffersize=32768", // The default of 32 iterations fill up 32 KiB about twice
202 "--checkpointfile=checkpoint-%s-%s.xml.gz" ] );
203 $dumper->setDB( $this->getDb() );
204 $dumper->maxTimeAllowed
= $checkpointAfter; // Patching maxTime from 1 minute
205 $dumper->stderr
= $stderr;
207 // The actual dump and taking time
208 $ts_before = microtime( true );
210 $ts_after = microtime( true );
211 $lastDuration = $ts_after - $ts_before;
213 // Handling increasing the iteration count for the stubs
214 if ( $lastDuration < $minDuration ) {
215 $old_iterations = $iterations;
216 if ( $lastDuration > 0.2 ) {
217 // lastDuration is big enough, to allow an educated guess
218 $factor = ( $minDuration +
0.5 ) / $lastDuration;
219 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
220 // educated guess is reasonable
221 $iterations = (int)( $iterations * $factor );
225 if ( $old_iterations == $iterations ) {
226 // Heuristics were not applied, so we just *2.
230 $this->assertLessThan( 50000, $iterations,
231 "Emergency stop against infinitely increasing iteration "
232 . "count ( last duration: $lastDuration )" );
236 // The dump (hopefully) did take long enough to produce more than one
238 // We now check all the checkpoint files for validity.
240 $files = scandir( $nameOutputDir );
241 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
244 $checkpointFiles = 0;
246 $asserter = $this->getDumpAsserter();
248 // Each run of the following loop body tries to handle exactly 1 /page/ (not
249 // iteration of stub content). $i is only increased after having treated page 4.
250 for ( $i = 0; $i < $iterations; ) {
251 // 1. Assuring a file is opened and ready. Skipping across header if
253 if ( !$fileOpened ) {
254 $this->assertNotEmpty( $files, "No more existing dump files, "
255 . "but not yet all pages found" );
256 $fname = array_shift( $files );
257 while ( $fname == "." ||
$fname == ".." ) {
258 $this->assertNotEmpty( $files, "No more existing dump"
259 . " files, but not yet all pages found" );
260 $fname = array_shift( $files );
262 if ( $checkpointFormat == "gzip" ) {
263 $this->gunzip( $nameOutputDir . "/" . $fname );
265 $asserter->open( $nameOutputDir . "/" . $fname );
266 $asserter->assertDumpHead();
271 // 2. Performing a single page check
272 switch ( $lookingForPage ) {
275 $asserter->assertPageStart(
276 $this->pageId1 +
$i * self
::$numOfPages,
278 $this->pageTitle1
->getPrefixedText()
280 $asserter->assertRevision(
281 $this->rev1_1
->getId() +
$i * self
::$numOfRevs,
282 $this->rev1_1
->getComment()->text
,
283 $this->getSlotTextId( $this->rev1_1
->getSlot( SlotRecord
::MAIN
) ),
285 $this->rev1_1
->getSha1(),
286 $this->getSlotText( $this->rev1_1
->getSlot( SlotRecord
::MAIN
) )
288 $asserter->assertPageEnd();
295 $asserter->assertPageStart(
296 $this->pageId2 +
$i * self
::$numOfPages,
298 $this->pageTitle2
->getPrefixedText()
300 $asserter->assertRevision(
301 $this->rev2_1
->getId() +
$i * self
::$numOfRevs,
302 $this->rev2_1
->getComment()->text
,
303 $this->getSlotTextId( $this->rev2_1
->getSlot( SlotRecord
::MAIN
) ),
305 $this->rev2_1
->getSha1(),
306 $this->getSlotText( $this->rev2_1
->getSlot( SlotRecord
::MAIN
) )
308 $asserter->assertRevision(
309 $this->rev2_2
->getId() +
$i * self
::$numOfRevs,
310 $this->rev2_2
->getComment()->text
,
311 $this->getSlotTextId( $this->rev2_2
->getSlot( SlotRecord
::MAIN
) ),
313 $this->rev2_2
->getSha1(),
314 $this->getSlotText( $this->rev2_2
->getSlot( SlotRecord
::MAIN
) ),
315 $this->rev2_1
->getId() +
$i * self
::$numOfRevs
317 $asserter->assertRevision(
318 $this->rev2_3
->getId() +
$i * self
::$numOfRevs,
319 $this->rev2_3
->getComment()->text
,
320 $this->getSlotTextId( $this->rev2_3
->getSlot( SlotRecord
::MAIN
) ),
322 $this->rev2_3
->getSha1(),
323 $this->getSlotText( $this->rev2_3
->getSlot( SlotRecord
::MAIN
) ),
324 $this->rev2_2
->getId() +
$i * self
::$numOfRevs
326 $asserter->assertRevision(
327 $this->rev2_4
->getId() +
$i * self
::$numOfRevs,
328 $this->rev2_4
->getComment()->text
,
329 $this->getSlotTextId( $this->rev2_4
->getSlot( SlotRecord
::MAIN
) ),
331 $this->rev2_4
->getSha1(),
332 $this->getSlotText( $this->rev2_4
->getSlot( SlotRecord
::MAIN
) ),
333 $this->rev2_3
->getId() +
$i * self
::$numOfRevs
335 $asserter->assertPageEnd();
342 $asserter->assertPageStart(
343 $this->pageId4 +
$i * self
::$numOfPages,
345 $this->pageTitle4
->getPrefixedText()
347 $asserter->assertRevision(
348 $this->rev4_1
->getId() +
$i * self
::$numOfRevs,
349 $this->rev4_1
->getComment()->text
,
350 $this->getSlotTextId( $this->rev4_1
->getSlot( SlotRecord
::MAIN
) ),
352 $this->rev4_1
->getSha1(),
353 $this->getSlotText( $this->rev4_1
->getSlot( SlotRecord
::MAIN
) ),
355 "BackupTextPassTestModel",
358 $asserter->assertPageEnd();
362 // We dealt with the whole iteration.
367 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
370 $asserter->assertDumpEnd();
373 // Assuring we completely read all files ...
374 $this->assertFalse( $fileOpened, "Currently read file still open?" );
375 $this->assertSame( [], $files, "Remaining unchecked files" );
377 // ... and have dealt with more than one checkpoint file
378 $this->assertGreaterThan(
381 "expected more than 1 checkpoint to have been created. "
382 . "Checkpoint interval is $checkpointAfter seconds, maybe your computer is too fast?"
385 $this->expectETAOutput();
394 public function testCheckpointPlain() {
395 $this->checkpointHelper();
399 * tests for working checkpoint generation in gzip format work.
401 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
402 * were once problems when the used sinks were DumpPipeOutputs.
404 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
405 * PHP extensions, we go for gzip instead, which triggers the same relevant code
406 * paths while still being testable on more systems.
413 public function testCheckpointGzip() {
414 $this->checkHasGzip();
415 $this->checkpointHelper( "gzip" );
419 * Creates a stub file that is used for testing the text pass of dumps
421 * @param string $templateName
422 * @param string $schemaVersion
423 * @param string|null $outFile Absolute name of the file to write
424 * the stub into. If this parameter is null, a new temporary
425 * file is generated that is automatically removed upon tearDown.
426 * @param int $iterations (Optional) specifies how often the block
427 * of 3 pages should go into the stub file. The page and
428 * revision id increase further and further, while the text
429 * id of the first iteration is reused. The pages and revision
430 * of iteration > 1 have no corresponding representation in the database.
432 * @return string Absolute filename of the stub
434 private function setUpStub( $templateName, $schemaVersion, $outFile = null, $iterations = 1 ) {
435 $outFile ??
= $this->getNewTempFile();
437 $templatePath = $this->getDumpTemplatePath( $templateName, $schemaVersion );
439 $asserter = $this->getDumpAsserter( $schemaVersion );
440 $this->setAllRevisionsVarMappings( $asserter );
442 // Make revision point to a non-existent address, to test refreshing
444 $asserter->setVarMapping( 'rev4_1_main_location', 'tt:11111111' );
446 $writer = new XmlDumpWriter( XmlDumpWriter
::WRITE_STUB
, $schemaVersion );
447 $content = $writer->openStream();
449 for ( $i = 0; $i < $iterations; $i++
) {
450 $asserter->setVarMapping( 'rev1_1_pageid', $this->pageId1 +
$i * self
::$numOfPages );
451 $asserter->setVarMapping( 'rev1_1_id', $this->rev1_1
->getId() +
$i * self
::$numOfRevs );
453 $asserter->setVarMapping( 'rev2_1_pageid', $this->pageId2 +
$i * self
::$numOfPages );
454 $asserter->setVarMapping( 'rev2_1_id', $this->rev2_1
->getId() +
$i * self
::$numOfRevs );
455 $asserter->setVarMapping( 'rev2_2_id', $this->rev2_2
->getId() +
$i * self
::$numOfRevs );
456 $asserter->setVarMapping( 'rev2_3_id', $this->rev2_3
->getId() +
$i * self
::$numOfRevs );
457 $asserter->setVarMapping( 'rev2_4_id', $this->rev2_4
->getId() +
$i * self
::$numOfRevs );
459 $asserter->setVarMapping( 'rev4_1_pageid', $this->pageId4 +
$i * self
::$numOfPages );
460 $asserter->setVarMapping( 'rev4_1_id', $this->rev4_1
->getId() +
$i * self
::$numOfRevs );
462 $asserter->setVarMapping( 'rev5_1_pageid', $this->pageId5 +
$i * self
::$numOfPages );
463 $asserter->setVarMapping( 'rev5_1_id', $this->rev5_1
->getId() +
$i * self
::$numOfRevs );
465 $xml = file_get_contents( $templatePath );
466 $xml = $asserter->stripTestTags( $xml );
467 $xml = $asserter->resolveVars( $xml );
470 $content .= $writer->closeStream();
472 $this->assertEquals( strlen( $content ), file_put_contents(
473 $outFile, $content ), "Length of prepared stub" );
480 * Tests for TextPassDumper that do not rely on the database
482 * (As the Database group is only detected at class level (not method level), we
483 * cannot bring this test case's tests into the above main test case.)
486 * @covers \MediaWiki\Maintenance\TextPassDumper
488 class TextPassDumperDatabaselessTest
extends MediaWikiLangTestCase
{
490 * Ensures that setting the buffer size is effective.
492 * @dataProvider bufferSizeProvider
494 public function testBufferSizeSetting( $expected, $size, $msg ) {
495 $dumper = new TextPassDumperAccessor();
496 $dumper->loadWithArgv( [ "--buffersize=" . $size ] );
498 $this->assertEquals( $expected, $dumper->getBufferSize(), $msg );
502 * Ensures that setting the buffer size is effective.
504 * @dataProvider bufferSizeProvider
506 public static function bufferSizeProvider() {
507 // expected, bufferSize to initialize with, message
509 [ 512 * 1024, 512 * 1024, "Setting 512 KiB is not effective" ],
510 [ 8192, 8192, "Setting 8 KiB is not effective" ],
511 [ 4096, 2048, "Could set buffer size below lower bound" ]
517 * Accessor for internal state of TextPassDumper
519 * Do not warrentless add getters here.
521 class TextPassDumperAccessor
extends TextPassDumper
{
523 * Gets the bufferSize.
525 * If bufferSize setting does not work correctly, testCheckpoint... tests
526 * fail and point in the wrong direction. To aid in troubleshooting when
527 * testCheckpoint... tests break at some point in the future, we test the
528 * bufferSize setting, hence need this accessor.
530 * (Yes, bufferSize is internal state of the TextPassDumper, but aiding
531 * debugging of testCheckpoint... in the future seems to be worth testing
532 * against it nonetheless.)
536 public function getBufferSize() {
537 return $this->bufferSize
;
540 public function dump( $history, $text = null ) {