3 require_once __DIR__
. "/../../../maintenance/backupTextPass.inc";
6 * Tests for page dumps of BackupDumper
11 class TextPassDumperTest
extends DumpTestCase
{
13 // We'll add several pages, revision and texts. The following variables hold the
15 private $pageId1, $pageId2, $pageId3, $pageId4;
16 private static $numOfPages = 4;
17 private $revId1_1, $textId1_1;
18 private $revId2_1, $textId2_1, $revId2_2, $textId2_2;
19 private $revId2_3, $textId2_3, $revId2_4, $textId2_4;
20 private $revId3_1, $textId3_1, $revId3_2, $textId3_2;
21 private $revId4_1, $textId4_1;
22 private static $numOfRevs = 8;
24 function addDBData() {
25 $this->tablesUsed
[] = 'page';
26 $this->tablesUsed
[] = 'revision';
27 $this->tablesUsed
[] = 'text';
29 $ns = $this->getDefaultWikitextNS();
33 $title = Title
::newFromText( 'BackupDumperTestP1', $ns );
34 $page = WikiPage
::factory( $title );
35 list( $this->revId1_1
, $this->textId1_1
) = $this->addRevision( $page,
36 "BackupDumperTestP1Text1", "BackupDumperTestP1Summary1" );
37 $this->pageId1
= $page->getId();
39 // Page with more than one revision
40 $title = Title
::newFromText( 'BackupDumperTestP2', $ns );
41 $page = WikiPage
::factory( $title );
42 list( $this->revId2_1
, $this->textId2_1
) = $this->addRevision( $page,
43 "BackupDumperTestP2Text1", "BackupDumperTestP2Summary1" );
44 list( $this->revId2_2
, $this->textId2_2
) = $this->addRevision( $page,
45 "BackupDumperTestP2Text2", "BackupDumperTestP2Summary2" );
46 list( $this->revId2_3
, $this->textId2_3
) = $this->addRevision( $page,
47 "BackupDumperTestP2Text3", "BackupDumperTestP2Summary3" );
48 list( $this->revId2_4
, $this->textId2_4
) = $this->addRevision( $page,
49 "BackupDumperTestP2Text4 some additional Text ",
50 "BackupDumperTestP2Summary4 extra " );
51 $this->pageId2
= $page->getId();
54 $title = Title
::newFromText( 'BackupDumperTestP3', $ns );
55 $page = WikiPage
::factory( $title );
56 list( $this->revId3_1
, $this->textId3_1
) = $this->addRevision( $page,
57 "BackupDumperTestP3Text1", "BackupDumperTestP2Summary1" );
58 list( $this->revId3_2
, $this->textId3_2
) = $this->addRevision( $page,
59 "BackupDumperTestP3Text2", "BackupDumperTestP2Summary2" );
60 $this->pageId3
= $page->getId();
61 $page->doDeleteArticle( "Testing ;)" );
63 // Page from non-default namespace
65 if ( $ns === NS_TALK
) {
66 //@todo: work around this.
67 throw new MWException( "The default wikitext namespace is the talk namespace. "
68 . " We can't currently deal with that." );
71 $title = Title
::newFromText( 'BackupDumperTestP1', NS_TALK
);
72 $page = WikiPage
::factory( $title );
73 list( $this->revId4_1
, $this->textId4_1
) = $this->addRevision( $page,
74 "Talk about BackupDumperTestP1 Text1",
75 "Talk BackupDumperTestP1 Summary1" );
76 $this->pageId4
= $page->getId();
77 } catch ( Exception
$e ) {
78 // We'd love to pass $e directly. However, ... see
79 // documentation of exceptionFromAddDBData in
81 $this->exceptionFromAddDBData
= $e;
86 protected function setUp() {
89 // Since we will restrict dumping by page ranges (to allow
90 // working tests, even if the db gets prepopulated by a base
91 // class), we have to assert, that the page id are consecutively
94 array( $this->pageId2
, $this->pageId3
, $this->pageId4
),
95 array( $this->pageId1 +
1, $this->pageId2 +
1, $this->pageId3 +
1 ),
96 "Page ids increasing without holes" );
100 function testPlain() {
101 // Setting up the dump
102 $nameStub = $this->setUpStub();
103 $nameFull = $this->getNewTempFile();
104 $dumper = new TextPassDumper( array( "--stub=file:" . $nameStub,
105 "--output=file:" . $nameFull ) );
106 $dumper->reporting
= false;
107 $dumper->setDb( $this->db
);
109 // Performing the dump
110 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
112 // Checking for correctness of the dumped data
113 $this->assertDumpStart( $nameFull );
116 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
117 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
118 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
119 "BackupDumperTestP1Text1" );
120 $this->assertPageEnd();
123 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
124 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
125 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
126 "BackupDumperTestP2Text1" );
127 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
128 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
129 "BackupDumperTestP2Text2", $this->revId2_1
);
130 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
131 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
132 "BackupDumperTestP2Text3", $this->revId2_2
);
133 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
134 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
135 "BackupDumperTestP2Text4 some additional Text", $this->revId2_3
);
136 $this->assertPageEnd();
139 // -> Page is marked deleted. Hence not visible
142 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
143 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
144 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
145 "Talk about BackupDumperTestP1 Text1" );
146 $this->assertPageEnd();
148 $this->assertDumpEnd();
151 function testPrefetchPlain() {
152 // The mapping between ids and text, for the hits of the prefetch mock
153 $prefetchMap = array(
154 array( $this->pageId1
, $this->revId1_1
, "Prefetch_________1Text1" ),
155 array( $this->pageId2
, $this->revId2_3
, "Prefetch_________2Text3" )
159 $prefetchMock = $this->getMock( 'BaseDump', array( 'prefetch' ), array(), '', false );
160 $prefetchMock->expects( $this->exactly( 6 ) )
161 ->method( 'prefetch' )
162 ->will( $this->returnValueMap( $prefetchMap ) );
164 // Setting up of the dump
165 $nameStub = $this->setUpStub();
166 $nameFull = $this->getNewTempFile();
167 $dumper = new TextPassDumper( array( "--stub=file:"
168 . $nameStub, "--output=file:" . $nameFull ) );
169 $dumper->prefetch
= $prefetchMock;
170 $dumper->reporting
= false;
171 $dumper->setDb( $this->db
);
173 // Performing the dump
174 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
176 // Checking for correctness of the dumped data
177 $this->assertDumpStart( $nameFull );
180 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
181 // Prefetch kicks in. This is still the SHA-1 of the original text,
182 // But the actual text (with different SHA-1) comes from prefetch.
183 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
184 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
185 "Prefetch_________1Text1" );
186 $this->assertPageEnd();
189 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
190 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
191 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
192 "BackupDumperTestP2Text1" );
193 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
194 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
195 "BackupDumperTestP2Text2", $this->revId2_1
);
196 // Prefetch kicks in. This is still the SHA-1 of the original text,
197 // But the actual text (with different SHA-1) comes from prefetch.
198 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
199 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
200 "Prefetch_________2Text3", $this->revId2_2
);
201 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
202 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
203 "BackupDumperTestP2Text4 some additional Text", $this->revId2_3
);
204 $this->assertPageEnd();
207 // -> Page is marked deleted. Hence not visible
210 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
211 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
212 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
213 "Talk about BackupDumperTestP1 Text1" );
214 $this->assertPageEnd();
216 $this->assertDumpEnd();
221 * Ensures that checkpoint dumps are used and written, by successively increasing the
222 * stub size and dumping until the duration crosses a threshold.
224 * @param $checkpointFormat string: Either "file" for plain text or "gzip" for gzipped
227 private function checkpointHelper( $checkpointFormat = "file" ) {
228 // Getting temporary names
229 $nameStub = $this->getNewTempFile();
230 $nameOutputDir = $this->getNewTempDirectory();
232 $stderr = fopen( 'php://output', 'a' );
233 if ( $stderr === false ) {
234 $this->fail( "Could not open stream for stderr" );
237 $iterations = 32; // We'll start with that many iterations of revisions in stub
239 $minDuration = 2; // We want the dump to take at least this many seconds
240 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
243 // Until a dump takes at least $minDuration seconds, perform a dump and check
244 // duration. If the dump did not take long enough increase the iteration
245 // count, to generate a bigger stub file next time.
246 while ( $lastDuration < $minDuration ) {
248 // Setting up the dump
249 wfRecursiveRemoveDir( $nameOutputDir );
250 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
251 "Creating temporary output directory " );
252 $this->setUpStub( $nameStub, $iterations );
253 $dumper = new TextPassDumper( array( "--stub=file:" . $nameStub,
254 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
255 "--maxtime=1" /*This is in minutes. Fixup is below*/,
256 "--checkpointfile=checkpoint-%s-%s.xml.gz" ) );
257 $dumper->setDb( $this->db
);
258 $dumper->maxTimeAllowed
= $checkpointAfter; // Patching maxTime from 1 minute
259 $dumper->stderr
= $stderr;
261 // The actual dump and taking time
262 $ts_before = microtime( true );
263 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
264 $ts_after = microtime( true );
265 $lastDuration = $ts_after - $ts_before;
267 // Handling increasing the iteration count for the stubs
268 if ( $lastDuration < $minDuration ) {
269 $old_iterations = $iterations;
270 if ( $lastDuration > 0.2 ) {
271 // lastDuration is big enough, to allow an educated guess
272 $factor = ( $minDuration +
0.5 ) / $lastDuration;
273 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
274 // educated guess is reasonable
275 $iterations = (int)( $iterations * $factor );
279 if ( $old_iterations == $iterations ) {
280 // Heuristics were not applied, so we just *2.
284 $this->assertLessThan( 50000, $iterations,
285 "Emergency stop against infinitely increasing iteration "
286 . "count ( last duration: $lastDuration )" );
290 // The dump (hopefully) did take long enough to produce more than one
293 // We now check all the checkpoint files for validity.
295 $files = scandir( $nameOutputDir );
296 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
299 $checkpointFiles = 0;
301 // Each run of the following loop body tries to handle exactly 1 /page/ (not
302 // iteration of stub content). $i is only increased after having treated page 4.
303 for ( $i = 0; $i < $iterations; ) {
305 // 1. Assuring a file is opened and ready. Skipping across header if
307 if ( !$fileOpened ) {
308 $this->assertNotEmpty( $files, "No more existing dump files, "
309 . "but not yet all pages found" );
310 $fname = array_shift( $files );
311 while ( $fname == "." ||
$fname == ".." ) {
312 $this->assertNotEmpty( $files, "No more existing dump"
313 . " files, but not yet all pages found" );
314 $fname = array_shift( $files );
316 if ( $checkpointFormat == "gzip" ) {
317 $this->gunzip( $nameOutputDir . "/" . $fname );
319 $this->assertDumpStart( $nameOutputDir . "/" . $fname );
324 // 2. Performing a single page check
325 switch ( $lookingForPage ) {
328 $this->assertPageStart( $this->pageId1 +
$i * self
::$numOfPages, NS_MAIN
,
329 "BackupDumperTestP1" );
330 $this->assertRevision( $this->revId1_1 +
$i * self
::$numOfRevs, "BackupDumperTestP1Summary1",
331 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
332 "BackupDumperTestP1Text1" );
333 $this->assertPageEnd();
340 $this->assertPageStart( $this->pageId2 +
$i * self
::$numOfPages, NS_MAIN
,
341 "BackupDumperTestP2" );
342 $this->assertRevision( $this->revId2_1 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary1",
343 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
344 "BackupDumperTestP2Text1" );
345 $this->assertRevision( $this->revId2_2 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary2",
346 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
347 "BackupDumperTestP2Text2", $this->revId2_1 +
$i * self
::$numOfRevs );
348 $this->assertRevision( $this->revId2_3 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary3",
349 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
350 "BackupDumperTestP2Text3", $this->revId2_2 +
$i * self
::$numOfRevs );
351 $this->assertRevision( $this->revId2_4 +
$i * self
::$numOfRevs,
352 "BackupDumperTestP2Summary4 extra",
353 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
354 "BackupDumperTestP2Text4 some additional Text",
355 $this->revId2_3 +
$i * self
::$numOfRevs );
356 $this->assertPageEnd();
363 $this->assertPageStart( $this->pageId4 +
$i * self
::$numOfPages, NS_TALK
,
364 "Talk:BackupDumperTestP1" );
365 $this->assertRevision( $this->revId4_1 +
$i * self
::$numOfRevs,
366 "Talk BackupDumperTestP1 Summary1",
367 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
368 "Talk about BackupDumperTestP1 Text1" );
369 $this->assertPageEnd();
373 // We dealt with the whole iteration.
378 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
381 // 3. Checking for the end of the current checkpoint file
382 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
383 && $this->xml
->name
== "mediawiki"
385 $this->assertDumpEnd();
390 // Assuring we completely read all files ...
391 $this->assertFalse( $fileOpened, "Currently read file still open?" );
392 $this->assertEmpty( $files, "Remaining unchecked files" );
394 // ... and have dealt with more than one checkpoint file
395 $this->assertGreaterThan( 1, $checkpointFiles, "expected more than 1 checkpoint to have been created. Checkpoint interval is $checkpointAfter seconds, maybe your computer is too fast?" );
397 $this->expectETAOutput();
403 function testCheckpointPlain() {
404 $this->checkpointHelper();
408 * tests for working checkpoint generation in gzip format work.
410 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
411 * were once problems when the used sinks were DumpPipeOutputs.
413 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
414 * PHP extensions, we go for gzip instead, which triggers the same relevant code
415 * paths while still being testable on more systems.
419 function testCheckpointGzip() {
420 $this->checkHasGzip();
421 $this->checkpointHelper( "gzip" );
426 * Creates a stub file that is used for testing the text pass of dumps
428 * @param $fname string: (Optional) Absolute name of the file to write
429 * the stub into. If this parameter is null, a new temporary
430 * file is generated that is automatically removed upon
432 * @param $iterations integer: (Optional) specifies how often the block
433 * of 3 pages should go into the stub file. The page and
434 * revision id increase further and further, while the text
435 * id of the first iteration is reused. The pages and revision
436 * of iteration > 1 have no corresponding representation in the
438 * @return string absolute filename of the stub
440 private function setUpStub( $fname = null, $iterations = 1 ) {
441 if ( $fname === null ) {
442 $fname = $this->getNewTempFile();
444 $header = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.7/" '
445 . 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
446 . 'xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.7/ '
447 . 'http://www.mediawiki.org/xml/export-0.7.xsd" version="0.7" xml:lang="en">
449 <sitename>wikisvn</sitename>
450 <base>http://localhost/wiki-svn/index.php/Main_Page</base>
451 <generator>MediaWiki 1.21alpha</generator>
452 <case>first-letter</case>
454 <namespace key="-2" case="first-letter">Media</namespace>
455 <namespace key="-1" case="first-letter">Special</namespace>
456 <namespace key="0" case="first-letter" />
457 <namespace key="1" case="first-letter">Talk</namespace>
458 <namespace key="2" case="first-letter">User</namespace>
459 <namespace key="3" case="first-letter">User talk</namespace>
460 <namespace key="4" case="first-letter">Wikisvn</namespace>
461 <namespace key="5" case="first-letter">Wikisvn talk</namespace>
462 <namespace key="6" case="first-letter">File</namespace>
463 <namespace key="7" case="first-letter">File talk</namespace>
464 <namespace key="8" case="first-letter">MediaWiki</namespace>
465 <namespace key="9" case="first-letter">MediaWiki talk</namespace>
466 <namespace key="10" case="first-letter">Template</namespace>
467 <namespace key="11" case="first-letter">Template talk</namespace>
468 <namespace key="12" case="first-letter">Help</namespace>
469 <namespace key="13" case="first-letter">Help talk</namespace>
470 <namespace key="14" case="first-letter">Category</namespace>
471 <namespace key="15" case="first-letter">Category talk</namespace>
475 $tail = '</mediawiki>
479 $iterations = intval( $iterations );
480 for ( $i = 0; $i < $iterations; $i++
) {
483 <title>BackupDumperTestP1</title>
485 <id>' . ( $this->pageId1 +
$i * self
::$numOfPages ) . '</id>
487 <id>' . ( $this->revId1_1 +
$i * self
::$numOfRevs ) . '</id>
488 <timestamp>2012-04-01T16:46:05Z</timestamp>
492 <comment>BackupDumperTestP1Summary1</comment>
493 <sha1>0bolhl6ol7i6x0e7yq91gxgaan39j87</sha1>
494 <model>wikitext</model>
495 <format>text/x-wiki</format>
496 <text id="' . $this->textId1_1
. '" bytes="23" />
501 <title>BackupDumperTestP2</title>
503 <id>' . ( $this->pageId2 +
$i * self
::$numOfPages ) . '</id>
505 <id>' . ( $this->revId2_1 +
$i * self
::$numOfRevs ) . '</id>
506 <timestamp>2012-04-01T16:46:05Z</timestamp>
510 <comment>BackupDumperTestP2Summary1</comment>
511 <sha1>jprywrymfhysqllua29tj3sc7z39dl2</sha1>
512 <model>wikitext</model>
513 <format>text/x-wiki</format>
514 <text id="' . $this->textId2_1
. '" bytes="23" />
517 <id>' . ( $this->revId2_2 +
$i * self
::$numOfRevs ) . '</id>
518 <parentid>' . ( $this->revId2_1 +
$i * self
::$numOfRevs ) . '</parentid>
519 <timestamp>2012-04-01T16:46:05Z</timestamp>
523 <comment>BackupDumperTestP2Summary2</comment>
524 <sha1>b7vj5ks32po5m1z1t1br4o7scdwwy95</sha1>
525 <model>wikitext</model>
526 <format>text/x-wiki</format>
527 <text id="' . $this->textId2_2
. '" bytes="23" />
530 <id>' . ( $this->revId2_3 +
$i * self
::$numOfRevs ) . '</id>
531 <parentid>' . ( $this->revId2_2 +
$i * self
::$numOfRevs ) . '</parentid>
532 <timestamp>2012-04-01T16:46:05Z</timestamp>
536 <comment>BackupDumperTestP2Summary3</comment>
537 <sha1>jfunqmh1ssfb8rs43r19w98k28gg56r</sha1>
538 <model>wikitext</model>
539 <format>text/x-wiki</format>
540 <text id="' . $this->textId2_3
. '" bytes="23" />
543 <id>' . ( $this->revId2_4 +
$i * self
::$numOfRevs ) . '</id>
544 <parentid>' . ( $this->revId2_3 +
$i * self
::$numOfRevs ) . '</parentid>
545 <timestamp>2012-04-01T16:46:05Z</timestamp>
549 <comment>BackupDumperTestP2Summary4 extra</comment>
550 <sha1>6o1ciaxa6pybnqprmungwofc4lv00wv</sha1>
551 <model>wikitext</model>
552 <format>text/x-wiki</format>
553 <text id="' . $this->textId2_4
. '" bytes="44" />
557 // page 3 not in stub
560 <title>Talk:BackupDumperTestP1</title>
562 <id>' . ( $this->pageId4 +
$i * self
::$numOfPages ) . '</id>
564 <id>' . ( $this->revId4_1 +
$i * self
::$numOfRevs ) . '</id>
565 <timestamp>2012-04-01T16:46:05Z</timestamp>
569 <comment>Talk BackupDumperTestP1 Summary1</comment>
570 <sha1>nktofwzd0tl192k3zfepmlzxoax1lpe</sha1>
571 <model>wikitext</model>
572 <format>text/x-wiki</format>
573 <text id="' . $this->textId4_1
. '" bytes="35" />
577 $content .= $page1 . $page2 . $page4;
580 $this->assertEquals( strlen( $content ), file_put_contents(
581 $fname, $content ), "Length of prepared stub" );