Localisation updates from https://translatewiki.net.
[mediawiki.git] / tests / phpunit / maintenance / TextPassDumperDatabaseTest.php
blobdce01ea26266434bbe542cff7e8fd6c01ee1ed6f
1 <?php
3 namespace MediaWiki\Tests\Maintenance;
5 use BaseDump;
6 use MediaWiki\Maintenance\TextPassDumper;
7 use MediaWiki\Revision\RevisionRecord;
8 use MediaWiki\Revision\SlotRecord;
9 use MediaWikiLangTestCase;
10 use WikiExporter;
11 use XmlDumpWriter;
13 /**
14 * Tests for TextPassDumper that rely on the database
16 * Some of these tests use the old constuctor for TextPassDumper
17 * and the dump() function, while others use the new loadWithArgv( $args )
18 * function and execute(). This is to ensure both the old and new methods
19 * work properly.
21 * @group Database
22 * @group Dump
23 * @covers \MediaWiki\Maintenance\TextPassDumper
25 class TextPassDumperDatabaseTest extends DumpTestCase {
27 use PageDumpTestDataTrait;
29 public function addDBData() {
30 parent::addDBData();
32 $this->addTestPages( $this->getTestSysop()->getUser() );
35 public function schemaVersionProvider() {
36 foreach ( XmlDumpWriter::$supportedSchemas as $schemaVersion ) {
37 yield [ $schemaVersion ];
41 /**
42 * @dataProvider schemaVersionProvider
44 public function testFullTextPlain( $schemaVersion ) {
45 // Setting up the dump
46 $nameStub = $this->setUpStub( 'AllStubs', $schemaVersion );
47 $nameFull = $this->getNewTempFile();
49 $dumper = new TextPassDumper( [ "--stub=file:" . $nameStub,
50 "--output=file:" . $nameFull, '--schema-version', $schemaVersion ] );
51 $dumper->reporting = false;
52 $dumper->setDB( $this->getDb() );
54 // Performing the dump
55 $dumper->dump( WikiExporter::FULL, WikiExporter::TEXT );
57 // Checking the dumped data
58 $this->assertDumpSchema( $nameFull, $this->getXmlSchemaPath( $schemaVersion ) );
60 $asserter = $this->getDumpAsserter( $schemaVersion );
61 $this->setSiteVarMappings( $asserter );
62 $this->setAllRevisionsVarMappings( $asserter );
64 $siteInfoTemplate = $this->getDumpTemplatePath( 'SiteInfo', $schemaVersion );
65 $pagesTemplate = $this->getDumpTemplatePath( 'AllText', $schemaVersion );
67 $asserter->open( $nameFull );
68 $asserter->assertDumpHead( $siteInfoTemplate );
69 $asserter->assertDOM( $pagesTemplate );
70 $asserter->assertDumpEnd();
73 public function testPrefetchPlain() {
74 global $wgXmlDumpSchemaVersion;
76 /** @var RevisionRecord[] $revisions */
77 $revisions = [
78 $this->rev1_1->getId() => $this->rev1_1,
79 $this->rev2_1->getId() => $this->rev2_1,
80 $this->rev2_2->getId() => $this->rev2_2,
81 $this->rev2_3->getId() => $this->rev2_3,
82 $this->rev2_4->getId() => $this->rev2_4,
83 $this->rev4_1->getId() => $this->rev4_1,
86 $getPrefetchText = static function ( $pageid, $revid, $role ) use ( $revisions ) {
87 $rev = $revisions[$revid];
88 $slot = $rev->getSlot( $role );
90 // NOTE: TextPassDumper does a check on the string length,
91 // so we have to pad to match the original length. The hash is not checked.
92 return str_pad( "Prefetch: ({$pageid}/{$revid}/$role)", $slot->getSize(), '*' );
95 // The mock itself
96 $prefetchMock = $this->getMockBuilder( BaseDump::class )
97 ->onlyMethods( [ 'prefetch' ] )
98 ->disableOriginalConstructor()
99 ->getMock();
100 $prefetchMock->method( 'prefetch' )
101 ->willReturnCallback( $getPrefetchText );
103 // Setting up of the dump
104 $nameStub = $this->setUpStub( 'AllStubs', $wgXmlDumpSchemaVersion );
105 $nameFull = $this->getNewTempFile();
107 $dumper = new TextPassDumper( [ "--stub=file:" . $nameStub,
108 "--output=file:" . $nameFull ] );
110 $dumper->prefetch = $prefetchMock;
111 $dumper->reporting = false;
112 $dumper->setDB( $this->getDb() );
114 // Performing the dump
115 $dumper->dump( WikiExporter::FULL, WikiExporter::TEXT );
117 // Checking the dumped data
118 $this->assertDumpSchema( $nameFull, $this->getXmlSchemaPath( $wgXmlDumpSchemaVersion ) );
120 $asserter = $this->getDumpAsserter( $wgXmlDumpSchemaVersion );
121 $this->setSiteVarMappings( $asserter );
122 $this->setAllRevisionsVarMappings( $asserter );
124 $siteInfoTemplate = $this->getDumpTemplatePath( 'SiteInfo', $wgXmlDumpSchemaVersion );
125 $pagesTemplate = $this->getDumpTemplatePath( 'AllText', $wgXmlDumpSchemaVersion );
127 $asserter->setVarMapping(
128 'rev1_1_main_text',
129 $getPrefetchText( $this->rev1_1->getPageId(), $this->rev1_1->getId(), SlotRecord::MAIN )
131 $asserter->setVarMapping(
132 'rev1_1_aux_text',
133 $getPrefetchText( $this->rev1_1->getPageId(), $this->rev1_1->getId(), 'aux' )
135 $asserter->setVarMapping(
136 'rev2_1_main_text',
137 $getPrefetchText( $this->rev2_1->getPageId(), $this->rev2_1->getId(), SlotRecord::MAIN )
139 $asserter->setVarMapping(
140 'rev2_2_main_text',
141 $getPrefetchText( $this->rev2_2->getPageId(), $this->rev2_2->getId(), SlotRecord::MAIN )
143 $asserter->setVarMapping(
144 'rev2_3_main_text',
145 $getPrefetchText( $this->rev2_3->getPageId(), $this->rev2_3->getId(), SlotRecord::MAIN )
147 $asserter->setVarMapping(
148 'rev2_4_main_text',
149 $getPrefetchText( $this->rev2_4->getPageId(), $this->rev2_4->getId(), SlotRecord::MAIN )
151 $asserter->setVarMapping(
152 'rev4_1_main_text',
153 $getPrefetchText( $this->rev4_1->getPageId(), $this->rev4_1->getId(), SlotRecord::MAIN )
156 $asserter->open( $nameFull );
157 $asserter->assertDumpHead( $siteInfoTemplate );
158 $asserter->assertDOM( $pagesTemplate );
159 $asserter->assertDumpEnd();
163 * Ensures that checkpoint dumps are used and written, by successively increasing the
164 * stub size and dumping until the duration crosses a threshold.
166 * @param string $checkpointFormat Either "file" for plain text or "gzip" for gzipped
167 * checkpoint files.
169 private function checkpointHelper( $checkpointFormat = "file" ) {
170 global $wgXmlDumpSchemaVersion;
172 // Getting temporary names
173 $nameStub = $this->getNewTempFile();
174 $nameOutputDir = $this->getNewTempDirectory();
176 $stderr = fopen( 'php://output', 'a' );
177 if ( $stderr === false ) {
178 $this->fail( "Could not open stream for stderr" );
181 $iterations = 32; // We'll start with that many iterations of revisions
182 // in stub. Make sure that the generated volume is above the buffer size
183 // set below. Otherwise, the checkpointing does not trigger.
184 $lastDuration = 0;
185 $minDuration = 2; // We want the dump to take at least this many seconds
186 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
188 // Until a dump takes at least $minDuration seconds, perform a dump and check
189 // duration. If the dump did not take long enough increase the iteration
190 // count, to generate a bigger stub file next time.
191 while ( $lastDuration < $minDuration ) {
192 // Setting up the dump
193 wfRecursiveRemoveDir( $nameOutputDir );
194 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
195 "Creating temporary output directory " );
196 $this->setUpStub( 'AllStubs', $wgXmlDumpSchemaVersion, $nameStub, $iterations );
197 $dumper = new TextPassDumper();
198 $dumper->loadWithArgv( [ "--stub=file:" . $nameStub,
199 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
200 "--maxtime=1", // This is in minutes. Fixup is below
201 "--buffersize=32768", // The default of 32 iterations fill up 32 KiB about twice
202 "--checkpointfile=checkpoint-%s-%s.xml.gz" ] );
203 $dumper->setDB( $this->getDb() );
204 $dumper->maxTimeAllowed = $checkpointAfter; // Patching maxTime from 1 minute
205 $dumper->stderr = $stderr;
207 // The actual dump and taking time
208 $ts_before = microtime( true );
209 $dumper->execute();
210 $ts_after = microtime( true );
211 $lastDuration = $ts_after - $ts_before;
213 // Handling increasing the iteration count for the stubs
214 if ( $lastDuration < $minDuration ) {
215 $old_iterations = $iterations;
216 if ( $lastDuration > 0.2 ) {
217 // lastDuration is big enough, to allow an educated guess
218 $factor = ( $minDuration + 0.5 ) / $lastDuration;
219 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
220 // educated guess is reasonable
221 $iterations = (int)( $iterations * $factor );
225 if ( $old_iterations == $iterations ) {
226 // Heuristics were not applied, so we just *2.
227 $iterations *= 2;
230 $this->assertLessThan( 50000, $iterations,
231 "Emergency stop against infinitely increasing iteration "
232 . "count ( last duration: $lastDuration )" );
236 // The dump (hopefully) did take long enough to produce more than one
237 // checkpoint file.
238 // We now check all the checkpoint files for validity.
240 $files = scandir( $nameOutputDir );
241 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
242 $fileOpened = false;
243 $lookingForPage = 1;
244 $checkpointFiles = 0;
246 $asserter = $this->getDumpAsserter();
248 // Each run of the following loop body tries to handle exactly 1 /page/ (not
249 // iteration of stub content). $i is only increased after having treated page 4.
250 for ( $i = 0; $i < $iterations; ) {
251 // 1. Assuring a file is opened and ready. Skipping across header if
252 // necessary.
253 if ( !$fileOpened ) {
254 $this->assertNotEmpty( $files, "No more existing dump files, "
255 . "but not yet all pages found" );
256 $fname = array_shift( $files );
257 while ( $fname == "." || $fname == ".." ) {
258 $this->assertNotEmpty( $files, "No more existing dump"
259 . " files, but not yet all pages found" );
260 $fname = array_shift( $files );
262 if ( $checkpointFormat == "gzip" ) {
263 $this->gunzip( $nameOutputDir . "/" . $fname );
265 $asserter->open( $nameOutputDir . "/" . $fname );
266 $asserter->assertDumpHead();
267 $fileOpened = true;
268 $checkpointFiles++;
271 // 2. Performing a single page check
272 switch ( $lookingForPage ) {
273 case 1:
274 // Page 1
275 $asserter->assertPageStart(
276 $this->pageId1 + $i * self::$numOfPages,
277 NS_MAIN,
278 $this->pageTitle1->getPrefixedText()
280 $asserter->assertRevision(
281 $this->rev1_1->getId() + $i * self::$numOfRevs,
282 $this->rev1_1->getComment()->text,
283 $this->getSlotTextId( $this->rev1_1->getSlot( SlotRecord::MAIN ) ),
284 false,
285 $this->rev1_1->getSha1(),
286 $this->getSlotText( $this->rev1_1->getSlot( SlotRecord::MAIN ) )
288 $asserter->assertPageEnd();
290 $lookingForPage = 2;
291 break;
293 case 2:
294 // Page 2
295 $asserter->assertPageStart(
296 $this->pageId2 + $i * self::$numOfPages,
297 NS_MAIN,
298 $this->pageTitle2->getPrefixedText()
300 $asserter->assertRevision(
301 $this->rev2_1->getId() + $i * self::$numOfRevs,
302 $this->rev2_1->getComment()->text,
303 $this->getSlotTextId( $this->rev2_1->getSlot( SlotRecord::MAIN ) ),
304 false,
305 $this->rev2_1->getSha1(),
306 $this->getSlotText( $this->rev2_1->getSlot( SlotRecord::MAIN ) )
308 $asserter->assertRevision(
309 $this->rev2_2->getId() + $i * self::$numOfRevs,
310 $this->rev2_2->getComment()->text,
311 $this->getSlotTextId( $this->rev2_2->getSlot( SlotRecord::MAIN ) ),
312 false,
313 $this->rev2_2->getSha1(),
314 $this->getSlotText( $this->rev2_2->getSlot( SlotRecord::MAIN ) ),
315 $this->rev2_1->getId() + $i * self::$numOfRevs
317 $asserter->assertRevision(
318 $this->rev2_3->getId() + $i * self::$numOfRevs,
319 $this->rev2_3->getComment()->text,
320 $this->getSlotTextId( $this->rev2_3->getSlot( SlotRecord::MAIN ) ),
321 false,
322 $this->rev2_3->getSha1(),
323 $this->getSlotText( $this->rev2_3->getSlot( SlotRecord::MAIN ) ),
324 $this->rev2_2->getId() + $i * self::$numOfRevs
326 $asserter->assertRevision(
327 $this->rev2_4->getId() + $i * self::$numOfRevs,
328 $this->rev2_4->getComment()->text,
329 $this->getSlotTextId( $this->rev2_4->getSlot( SlotRecord::MAIN ) ),
330 false,
331 $this->rev2_4->getSha1(),
332 $this->getSlotText( $this->rev2_4->getSlot( SlotRecord::MAIN ) ),
333 $this->rev2_3->getId() + $i * self::$numOfRevs
335 $asserter->assertPageEnd();
337 $lookingForPage = 4;
338 break;
340 case 4:
341 // Page 4
342 $asserter->assertPageStart(
343 $this->pageId4 + $i * self::$numOfPages,
344 NS_TALK,
345 $this->pageTitle4->getPrefixedText()
347 $asserter->assertRevision(
348 $this->rev4_1->getId() + $i * self::$numOfRevs,
349 $this->rev4_1->getComment()->text,
350 $this->getSlotTextId( $this->rev4_1->getSlot( SlotRecord::MAIN ) ),
351 false,
352 $this->rev4_1->getSha1(),
353 $this->getSlotText( $this->rev4_1->getSlot( SlotRecord::MAIN ) ),
354 false,
355 "BackupTextPassTestModel",
356 "text/plain"
358 $asserter->assertPageEnd();
360 $lookingForPage = 1;
362 // We dealt with the whole iteration.
363 $i++;
364 break;
366 default:
367 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
370 $asserter->assertDumpEnd();
371 $fileOpened = false;
373 // Assuring we completely read all files ...
374 $this->assertFalse( $fileOpened, "Currently read file still open?" );
375 $this->assertSame( [], $files, "Remaining unchecked files" );
377 // ... and have dealt with more than one checkpoint file
378 $this->assertGreaterThan(
380 $checkpointFiles,
381 "expected more than 1 checkpoint to have been created. "
382 . "Checkpoint interval is $checkpointAfter seconds, maybe your computer is too fast?"
385 $this->expectETAOutput();
389 * Broken per T70653.
391 * @group large
392 * @group Broken
394 public function testCheckpointPlain() {
395 $this->checkpointHelper();
399 * tests for working checkpoint generation in gzip format work.
401 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
402 * were once problems when the used sinks were DumpPipeOutputs.
404 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
405 * PHP extensions, we go for gzip instead, which triggers the same relevant code
406 * paths while still being testable on more systems.
408 * Broken per T70653.
410 * @group large
411 * @group Broken
413 public function testCheckpointGzip() {
414 $this->checkHasGzip();
415 $this->checkpointHelper( "gzip" );
419 * Creates a stub file that is used for testing the text pass of dumps
421 * @param string $templateName
422 * @param string $schemaVersion
423 * @param string|null $outFile Absolute name of the file to write
424 * the stub into. If this parameter is null, a new temporary
425 * file is generated that is automatically removed upon tearDown.
426 * @param int $iterations (Optional) specifies how often the block
427 * of 3 pages should go into the stub file. The page and
428 * revision id increase further and further, while the text
429 * id of the first iteration is reused. The pages and revision
430 * of iteration > 1 have no corresponding representation in the database.
432 * @return string Absolute filename of the stub
434 private function setUpStub( $templateName, $schemaVersion, $outFile = null, $iterations = 1 ) {
435 $outFile ??= $this->getNewTempFile();
437 $templatePath = $this->getDumpTemplatePath( $templateName, $schemaVersion );
439 $asserter = $this->getDumpAsserter( $schemaVersion );
440 $this->setAllRevisionsVarMappings( $asserter );
442 // Make revision point to a non-existent address, to test refreshing
443 // content address
444 $asserter->setVarMapping( 'rev4_1_main_location', 'tt:11111111' );
446 $writer = new XmlDumpWriter( XmlDumpWriter::WRITE_STUB, $schemaVersion );
447 $content = $writer->openStream();
449 for ( $i = 0; $i < $iterations; $i++ ) {
450 $asserter->setVarMapping( 'rev1_1_pageid', $this->pageId1 + $i * self::$numOfPages );
451 $asserter->setVarMapping( 'rev1_1_id', $this->rev1_1->getId() + $i * self::$numOfRevs );
453 $asserter->setVarMapping( 'rev2_1_pageid', $this->pageId2 + $i * self::$numOfPages );
454 $asserter->setVarMapping( 'rev2_1_id', $this->rev2_1->getId() + $i * self::$numOfRevs );
455 $asserter->setVarMapping( 'rev2_2_id', $this->rev2_2->getId() + $i * self::$numOfRevs );
456 $asserter->setVarMapping( 'rev2_3_id', $this->rev2_3->getId() + $i * self::$numOfRevs );
457 $asserter->setVarMapping( 'rev2_4_id', $this->rev2_4->getId() + $i * self::$numOfRevs );
459 $asserter->setVarMapping( 'rev4_1_pageid', $this->pageId4 + $i * self::$numOfPages );
460 $asserter->setVarMapping( 'rev4_1_id', $this->rev4_1->getId() + $i * self::$numOfRevs );
462 $asserter->setVarMapping( 'rev5_1_pageid', $this->pageId5 + $i * self::$numOfPages );
463 $asserter->setVarMapping( 'rev5_1_id', $this->rev5_1->getId() + $i * self::$numOfRevs );
465 $xml = file_get_contents( $templatePath );
466 $xml = $asserter->stripTestTags( $xml );
467 $xml = $asserter->resolveVars( $xml );
468 $content .= $xml;
470 $content .= $writer->closeStream();
472 $this->assertEquals( strlen( $content ), file_put_contents(
473 $outFile, $content ), "Length of prepared stub" );
475 return $outFile;
480 * Tests for TextPassDumper that do not rely on the database
482 * (As the Database group is only detected at class level (not method level), we
483 * cannot bring this test case's tests into the above main test case.)
485 * @group Dump
486 * @covers \MediaWiki\Maintenance\TextPassDumper
488 class TextPassDumperDatabaselessTest extends MediaWikiLangTestCase {
490 * Ensures that setting the buffer size is effective.
492 * @dataProvider bufferSizeProvider
494 public function testBufferSizeSetting( $expected, $size, $msg ) {
495 $dumper = new TextPassDumperAccessor();
496 $dumper->loadWithArgv( [ "--buffersize=" . $size ] );
497 $dumper->execute();
498 $this->assertEquals( $expected, $dumper->getBufferSize(), $msg );
502 * Ensures that setting the buffer size is effective.
504 * @dataProvider bufferSizeProvider
506 public static function bufferSizeProvider() {
507 // expected, bufferSize to initialize with, message
508 return [
509 [ 512 * 1024, 512 * 1024, "Setting 512 KiB is not effective" ],
510 [ 8192, 8192, "Setting 8 KiB is not effective" ],
511 [ 4096, 2048, "Could set buffer size below lower bound" ]
517 * Accessor for internal state of TextPassDumper
519 * Do not warrentless add getters here.
521 class TextPassDumperAccessor extends TextPassDumper {
523 * Gets the bufferSize.
525 * If bufferSize setting does not work correctly, testCheckpoint... tests
526 * fail and point in the wrong direction. To aid in troubleshooting when
527 * testCheckpoint... tests break at some point in the future, we test the
528 * bufferSize setting, hence need this accessor.
530 * (Yes, bufferSize is internal state of the TextPassDumper, but aiding
531 * debugging of testCheckpoint... in the future seems to be worth testing
532 * against it nonetheless.)
534 * @return int
536 public function getBufferSize() {
537 return $this->bufferSize;
540 public function dump( $history, $text = null ) {
541 return true;