3 require_once( "$IP/maintenance/backup.inc" );
4 require_once( "$IP/maintenance/backupTextPass.inc" );
7 * Tests for page dumps of BackupDumper
12 class TextPassDumperTest
extends DumpTestCase
{
14 // We'll add several pages, revision and texts. The following variables hold the
16 private $pageId1, $pageId2, $pageId3, $pageId4, $pageId5;
17 private $revId1_1, $textId1_1;
18 private $revId2_1, $textId2_1, $revId2_2, $textId2_2;
19 private $revId2_3, $textId2_3, $revId2_4, $textId2_4;
20 private $revId3_1, $textId3_1, $revId3_2, $textId3_2;
21 private $revId4_1, $textId4_1;
23 function addDBData() {
24 $this->tablesUsed
[] = 'page';
25 $this->tablesUsed
[] = 'revision';
26 $this->tablesUsed
[] = 'text';
30 $title = Title
::newFromText( 'BackupDumperTestP1' );
31 $page = WikiPage
::factory( $title );
32 list( $this->revId1_1
, $this->textId1_1
) = $this->addRevision( $page,
33 "BackupDumperTestP1Text1", "BackupDumperTestP1Summary1" );
34 $this->pageId1
= $page->getId();
36 // Page with more than one revision
37 $title = Title
::newFromText( 'BackupDumperTestP2' );
38 $page = WikiPage
::factory( $title );
39 list( $this->revId2_1
, $this->textId2_1
) = $this->addRevision( $page,
40 "BackupDumperTestP2Text1", "BackupDumperTestP2Summary1" );
41 list( $this->revId2_2
, $this->textId2_2
) = $this->addRevision( $page,
42 "BackupDumperTestP2Text2", "BackupDumperTestP2Summary2" );
43 list( $this->revId2_3
, $this->textId2_3
) = $this->addRevision( $page,
44 "BackupDumperTestP2Text3", "BackupDumperTestP2Summary3" );
45 list( $this->revId2_4
, $this->textId2_4
) = $this->addRevision( $page,
46 "BackupDumperTestP2Text4 some additional Text ",
47 "BackupDumperTestP2Summary4 extra " );
48 $this->pageId2
= $page->getId();
51 $title = Title
::newFromText( 'BackupDumperTestP3' );
52 $page = WikiPage
::factory( $title );
53 list( $this->revId3_1
, $this->textId3_1
) = $this->addRevision( $page,
54 "BackupDumperTestP3Text1", "BackupDumperTestP2Summary1" );
55 list( $this->revId3_2
, $this->textId3_2
) = $this->addRevision( $page,
56 "BackupDumperTestP3Text2", "BackupDumperTestP2Summary2" );
57 $this->pageId3
= $page->getId();
58 $page->doDeleteArticle( "Testing ;)" );
60 // Page from non-default namespace
61 $title = Title
::newFromText( 'BackupDumperTestP1', NS_TALK
);
62 $page = WikiPage
::factory( $title );
63 list( $this->revId4_1
, $this->textId4_1
) = $this->addRevision( $page,
64 "Talk about BackupDumperTestP1 Text1",
65 "Talk BackupDumperTestP1 Summary1" );
66 $this->pageId4
= $page->getId();
67 } catch ( Exception
$e ) {
68 // We'd love to pass $e directly. However, ... see
69 // documentation of exceptionFromAddDBData in
71 $this->exceptionFromAddDBData
= $e;
76 protected function setUp() {
79 // Since we will restrict dumping by page ranges (to allow
80 // working tests, even if the db gets prepopulated by a base
81 // class), we have to assert, that the page id are consecutively
84 array( $this->pageId2
, $this->pageId3
, $this->pageId4
),
85 array( $this->pageId1 +
1, $this->pageId2 +
1, $this->pageId3 +
1 ),
86 "Page ids increasing without holes" );
90 function testPlain() {
91 // Setting up the dump
92 $nameStub = $this->setUpStub();
93 $nameFull = $this->getNewTempFile();
94 $dumper = new TextPassDumper( array ( "--stub=file:" . $nameStub,
95 "--output=file:" . $nameFull ) );
96 $dumper->reporting
= false;
97 $dumper->setDb( $this->db
);
99 // Performing the dump
100 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
102 // Checking for correctness of the dumped data
103 $this->assertDumpStart( $nameFull );
106 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
107 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
108 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
109 "BackupDumperTestP1Text1" );
110 $this->assertPageEnd();
113 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
114 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
115 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
116 "BackupDumperTestP2Text1" );
117 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
118 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
119 "BackupDumperTestP2Text2" );
120 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
121 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
122 "BackupDumperTestP2Text3" );
123 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
124 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
125 "BackupDumperTestP2Text4 some additional Text" );
126 $this->assertPageEnd();
129 // -> Page is marked deleted. Hence not visible
132 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
133 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
134 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
135 "Talk about BackupDumperTestP1 Text1" );
136 $this->assertPageEnd();
138 $this->assertDumpEnd();
141 function testPrefetchPlain() {
142 // The mapping between ids and text, for the hits of the prefetch mock
143 $prefetchMap = array(
144 array( $this->pageId1
, $this->revId1_1
, "Prefetch_________1Text1" ),
145 array( $this->pageId2
, $this->revId2_3
, "Prefetch_________2Text3" )
149 $prefetchMock = $this->getMock( 'BaseDump', array( 'prefetch' ), array(), '', FALSE );
150 $prefetchMock->expects( $this->exactly( 6 ) )
151 ->method( 'prefetch' )
152 ->will( $this->returnValueMap( $prefetchMap ) );
154 // Setting up of the dump
155 $nameStub = $this->setUpStub();
156 $nameFull = $this->getNewTempFile();
157 $dumper = new TextPassDumper( array ( "--stub=file:"
158 . $nameStub, "--output=file:" . $nameFull ) );
159 $dumper->prefetch
= $prefetchMock;
160 $dumper->reporting
= false;
161 $dumper->setDb( $this->db
);
163 // Performing the dump
164 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
166 // Checking for correctness of the dumped data
167 $this->assertDumpStart( $nameFull );
170 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
171 // Prefetch kicks in. This is still the SHA-1 of the original text,
172 // But the actual text (with different SHA-1) comes from prefetch.
173 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
174 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
175 "Prefetch_________1Text1" );
176 $this->assertPageEnd();
179 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
180 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
181 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
182 "BackupDumperTestP2Text1" );
183 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
184 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
185 "BackupDumperTestP2Text2" );
186 // Prefetch kicks in. This is still the SHA-1 of the original text,
187 // But the actual text (with different SHA-1) comes from prefetch.
188 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
189 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
190 "Prefetch_________2Text3" );
191 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
192 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
193 "BackupDumperTestP2Text4 some additional Text" );
194 $this->assertPageEnd();
197 // -> Page is marked deleted. Hence not visible
200 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
201 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
202 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
203 "Talk about BackupDumperTestP1 Text1" );
204 $this->assertPageEnd();
206 $this->assertDumpEnd();
211 * Ensures that checkpoint dumps are used and written, by successively increasing the
212 * stub size and dumping until the duration crosses a threshold.
214 * @param $checkpointFormat string: Either "file" for plain text or "gzip" for gzipped
217 private function checkpointHelper( $checkpointFormat = "file" ) {
218 // Getting temporary names
219 $nameStub = $this->getNewTempFile();
220 $nameOutputDir = $this->getNewTempDirectory();
222 $stderr = fopen( 'php://output', 'a' );
223 if ( $stderr === FALSE ) {
224 $this->fail( "Could not open stream for stderr" );
227 $iterations = 32; // We'll start with that many iterations of revisions in stub
229 $minDuration = 2; // We want the dump to take at least this many seconds
230 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
233 // Until a dump takes at least $minDuration seconds, perform a dump and check
234 // duration. If the dump did not take long enough increase the iteration
235 // count, to generate a bigger stub file next time.
236 while ( $lastDuration < $minDuration ) {
238 // Setting up the dump
239 wfRecursiveRemoveDir( $nameOutputDir );
240 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
241 "Creating temporary output directory " );
242 $this->setUpStub( $nameStub, $iterations );
243 $dumper = new TextPassDumper( array ( "--stub=file:" . $nameStub,
244 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
245 "--maxtime=1" /*This is in minutes. Fixup is below*/,
246 "--checkpointfile=checkpoint-%s-%s.xml.gz" ) );
247 $dumper->setDb( $this->db
);
248 $dumper->maxTimeAllowed
= $checkpointAfter; // Patching maxTime from 1 minute
249 $dumper->stderr
= $stderr;
251 // The actual dump and taking time
252 $ts_before = wfTime();
253 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
254 $ts_after = wfTime();
255 $lastDuration = $ts_after - $ts_before;
257 // Handling increasing the iteration count for the stubs
258 if ( $lastDuration < $minDuration ) {
259 $old_iterations = $iterations;
260 if ( $lastDuration > 0.2 ) {
261 // lastDuration is big enough, to allow an educated guess
262 $factor = ( $minDuration +
0.5 ) / $lastDuration;
263 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
264 // educated guess is reasonable
265 $iterations = (int)( $iterations * $factor );
269 if ( $old_iterations == $iterations ) {
270 // Heuristics were not applied, so we just *2.
274 $this->assertLessThan( 50000, $iterations,
275 "Emergency stop against infinitely increasing iteration "
276 . "count ( last duration: $lastDuration )" );
280 // The dump (hopefully) did take long enough to produce more than one
283 // We now check all the checkpoint files for validity.
285 $files = scandir( $nameOutputDir );
286 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
289 $checkpointFiles = 0;
291 // Each run of the following loop body tries to handle exactly 1 /page/ (not
292 // iteration of stub content). $i is only increased after having treated page 4.
293 for ( $i = 0 ; $i < $iterations ; ) {
295 // 1. Assuring a file is opened and ready. Skipping across header if
297 if ( ! $fileOpened ) {
298 $this->assertNotEmpty( $files, "No more existing dump files, "
299 . "but not yet all pages found" );
300 $fname = array_shift( $files );
301 while ( $fname == "." ||
$fname == ".." ) {
302 $this->assertNotEmpty( $files, "No more existing dump"
303 . " files, but not yet all pages found" );
304 $fname = array_shift( $files );
306 if ( $checkpointFormat == "gzip" ) {
307 $this->gunzip( $nameOutputDir . "/" . $fname );
309 $this->assertDumpStart( $nameOutputDir . "/" . $fname );
314 // 2. Performing a single page check
315 switch ( $lookingForPage ) {
318 $this->assertPageStart( $this->pageId1 +
$i * 4, NS_MAIN
,
319 "BackupDumperTestP1" );
320 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
321 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
322 "BackupDumperTestP1Text1" );
323 $this->assertPageEnd();
330 $this->assertPageStart( $this->pageId2 +
$i * 4, NS_MAIN
,
331 "BackupDumperTestP2" );
332 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
333 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
334 "BackupDumperTestP2Text1" );
335 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
336 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
337 "BackupDumperTestP2Text2" );
338 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
339 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
340 "BackupDumperTestP2Text3" );
341 $this->assertRevision( $this->revId2_4
,
342 "BackupDumperTestP2Summary4 extra",
343 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
344 "BackupDumperTestP2Text4 some additional Text" );
345 $this->assertPageEnd();
352 $this->assertPageStart( $this->pageId4 +
$i * 4, NS_TALK
,
353 "Talk:BackupDumperTestP1" );
354 $this->assertRevision( $this->revId4_1
,
355 "Talk BackupDumperTestP1 Summary1",
356 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
357 "Talk about BackupDumperTestP1 Text1" );
358 $this->assertPageEnd();
362 // We dealt with the whole iteration.
367 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
370 // 3. Checking for the end of the current checkpoint file
371 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
372 && $this->xml
->name
== "mediawiki" ) {
374 $this->assertDumpEnd();
379 // Assuring we completely read all files ...
380 $this->assertFalse( $fileOpened, "Currently read file still open?" );
381 $this->assertEmpty( $files, "Remaining unchecked files" );
383 // ... and have dealt with more than one checkpoint file
384 $this->assertGreaterThan( 1, $checkpointFiles, "# of checkpoint files" );
386 $this->expectETAOutput();
392 function testCheckpointPlain() {
393 $this->checkpointHelper();
397 * tests for working checkpoint generation in gzip format work.
399 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
400 * were once problems when the used sinks were DumpPipeOutputs.
402 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
403 * PHP extensions, we go for gzip instead, which triggers the same relevant code
404 * paths while still being testable on more systems.
408 function testCheckpointGzip() {
409 $this->checkpointHelper( "gzip" );
414 * Creates a stub file that is used for testing the text pass of dumps
416 * @param $fname string: (Optional) Absolute name of the file to write
417 * the stub into. If this parameter is null, a new temporary
418 * file is generated that is automatically removed upon
420 * @param $iterations integer: (Optional) specifies how often the block
421 * of 3 pages should go into the stub file. The page id
422 * increase further and further, while the revision and text
423 * ids of the first iteration are reused. The pages of
424 * iteration > 1 have no corresponding representation in the
426 * @return string absolute filename of the stub
428 private function setUpStub( $fname = null, $iterations = 1 ) {
429 if ( $fname === null ) {
430 $fname = $this->getNewTempFile();
432 $header = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.6/" '
433 . 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
434 . 'xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.6/ '
435 . 'http://www.mediawiki.org/xml/export-0.6.xsd" version="0.6" xml:lang="en">
437 <sitename>wikisvn</sitename>
438 <base>http://localhost/wiki-svn/index.php/Main_Page</base>
439 <generator>MediaWiki 1.20alpha</generator>
440 <case>first-letter</case>
442 <namespace key="-2" case="first-letter">Media</namespace>
443 <namespace key="-1" case="first-letter">Special</namespace>
444 <namespace key="0" case="first-letter" />
445 <namespace key="1" case="first-letter">Talk</namespace>
446 <namespace key="2" case="first-letter">User</namespace>
447 <namespace key="3" case="first-letter">User talk</namespace>
448 <namespace key="4" case="first-letter">Wikisvn</namespace>
449 <namespace key="5" case="first-letter">Wikisvn talk</namespace>
450 <namespace key="6" case="first-letter">File</namespace>
451 <namespace key="7" case="first-letter">File talk</namespace>
452 <namespace key="8" case="first-letter">MediaWiki</namespace>
453 <namespace key="9" case="first-letter">MediaWiki talk</namespace>
454 <namespace key="10" case="first-letter">Template</namespace>
455 <namespace key="11" case="first-letter">Template talk</namespace>
456 <namespace key="12" case="first-letter">Help</namespace>
457 <namespace key="13" case="first-letter">Help talk</namespace>
458 <namespace key="14" case="first-letter">Category</namespace>
459 <namespace key="15" case="first-letter">Category talk</namespace>
463 $tail = '</mediawiki>
467 $iterations = intval( $iterations );
468 for ( $i = 0; $i < $iterations; $i++
) {
471 <title>BackupDumperTestP1</title>
473 <id>' . ( $this->pageId1 +
$i * 4 ) . '</id>
475 <id>' . $this->revId1_1
. '</id>
476 <timestamp>2012-04-01T16:46:05Z</timestamp>
480 <comment>BackupDumperTestP1Summary1</comment>
481 <text id="' . $this->textId1_1
. '" bytes="23" />
482 <sha1>0bolhl6ol7i6x0e7yq91gxgaan39j87</sha1>
487 <title>BackupDumperTestP2</title>
489 <id>' . ( $this->pageId2 +
$i * 4 ) . '</id>
491 <id>' . $this->revId2_1
. '</id>
492 <timestamp>2012-04-01T16:46:05Z</timestamp>
496 <comment>BackupDumperTestP2Summary1</comment>
497 <text id="' . $this->textId2_1
. '" bytes="23" />
498 <sha1>jprywrymfhysqllua29tj3sc7z39dl2</sha1>
501 <id>' . $this->revId2_2
. '</id>
502 <timestamp>2012-04-01T16:46:05Z</timestamp>
506 <comment>BackupDumperTestP2Summary2</comment>
507 <text id="' . $this->textId2_2
. '" bytes="23" />
508 <sha1>b7vj5ks32po5m1z1t1br4o7scdwwy95</sha1>
511 <id>' . $this->revId2_3
. '</id>
512 <timestamp>2012-04-01T16:46:05Z</timestamp>
516 <comment>BackupDumperTestP2Summary3</comment>
517 <text id="' . $this->textId2_3
. '" bytes="23" />
518 <sha1>jfunqmh1ssfb8rs43r19w98k28gg56r</sha1>
521 <id>' . $this->revId2_4
. '</id>
522 <timestamp>2012-04-01T16:46:05Z</timestamp>
526 <comment>BackupDumperTestP2Summary4 extra</comment>
527 <text id="' . $this->textId2_4
. '" bytes="44" />
528 <sha1>6o1ciaxa6pybnqprmungwofc4lv00wv</sha1>
532 // page 3 not in stub
535 <title>Talk:BackupDumperTestP1</title>
537 <id>' . ( $this->pageId4 +
$i * 4 ) . '</id>
539 <id>' . $this->revId4_1
. '</id>
540 <timestamp>2012-04-01T16:46:05Z</timestamp>
544 <comment>Talk BackupDumperTestP1 Summary1</comment>
545 <text id="' . $this->textId4_1
. '" bytes="35" />
546 <sha1>nktofwzd0tl192k3zfepmlzxoax1lpe</sha1>
550 $content .= $page1 . $page2 . $page4;
553 $this->assertEquals( strlen( $content ), file_put_contents(
554 $fname, $content ), "Length of prepared stub" );