3 require_once __DIR__
. "/../../../maintenance/backupTextPass.inc";
6 * Tests for page dumps of BackupDumper
11 class TextPassDumperTest
extends DumpTestCase
{
13 // We'll add several pages, revision and texts. The following variables hold the
15 private $pageId1, $pageId2, $pageId3, $pageId4;
16 private static $numOfPages = 4;
17 private $revId1_1, $textId1_1;
18 private $revId2_1, $textId2_1, $revId2_2, $textId2_2;
19 private $revId2_3, $textId2_3, $revId2_4, $textId2_4;
20 private $revId3_1, $textId3_1, $revId3_2, $textId3_2;
21 private $revId4_1, $textId4_1;
22 private static $numOfRevs = 8;
24 function addDBData() {
25 $this->tablesUsed
[] = 'page';
26 $this->tablesUsed
[] = 'revision';
27 $this->tablesUsed
[] = 'text';
29 $ns = $this->getDefaultWikitextNS();
33 $title = Title
::newFromText( 'BackupDumperTestP1', $ns );
34 $page = WikiPage
::factory( $title );
35 list( $this->revId1_1
, $this->textId1_1
) = $this->addRevision( $page,
36 "BackupDumperTestP1Text1", "BackupDumperTestP1Summary1" );
37 $this->pageId1
= $page->getId();
39 // Page with more than one revision
40 $title = Title
::newFromText( 'BackupDumperTestP2', $ns );
41 $page = WikiPage
::factory( $title );
42 list( $this->revId2_1
, $this->textId2_1
) = $this->addRevision( $page,
43 "BackupDumperTestP2Text1", "BackupDumperTestP2Summary1" );
44 list( $this->revId2_2
, $this->textId2_2
) = $this->addRevision( $page,
45 "BackupDumperTestP2Text2", "BackupDumperTestP2Summary2" );
46 list( $this->revId2_3
, $this->textId2_3
) = $this->addRevision( $page,
47 "BackupDumperTestP2Text3", "BackupDumperTestP2Summary3" );
48 list( $this->revId2_4
, $this->textId2_4
) = $this->addRevision( $page,
49 "BackupDumperTestP2Text4 some additional Text ",
50 "BackupDumperTestP2Summary4 extra " );
51 $this->pageId2
= $page->getId();
54 $title = Title
::newFromText( 'BackupDumperTestP3', $ns );
55 $page = WikiPage
::factory( $title );
56 list( $this->revId3_1
, $this->textId3_1
) = $this->addRevision( $page,
57 "BackupDumperTestP3Text1", "BackupDumperTestP2Summary1" );
58 list( $this->revId3_2
, $this->textId3_2
) = $this->addRevision( $page,
59 "BackupDumperTestP3Text2", "BackupDumperTestP2Summary2" );
60 $this->pageId3
= $page->getId();
61 $page->doDeleteArticle( "Testing ;)" );
63 // Page from non-default namespace
65 if ( $ns === NS_TALK
) {
66 // @todo work around this.
67 throw new MWException( "The default wikitext namespace is the talk namespace. "
68 . " We can't currently deal with that." );
71 $title = Title
::newFromText( 'BackupDumperTestP1', NS_TALK
);
72 $page = WikiPage
::factory( $title );
73 list( $this->revId4_1
, $this->textId4_1
) = $this->addRevision( $page,
74 "Talk about BackupDumperTestP1 Text1",
75 "Talk BackupDumperTestP1 Summary1" );
76 $this->pageId4
= $page->getId();
77 } catch ( Exception
$e ) {
78 // We'd love to pass $e directly. However, ... see
79 // documentation of exceptionFromAddDBData in
81 $this->exceptionFromAddDBData
= $e;
85 protected function setUp() {
88 // Since we will restrict dumping by page ranges (to allow
89 // working tests, even if the db gets prepopulated by a base
90 // class), we have to assert, that the page id are consecutively
93 array( $this->pageId2
, $this->pageId3
, $this->pageId4
),
94 array( $this->pageId1 +
1, $this->pageId2 +
1, $this->pageId3 +
1 ),
95 "Page ids increasing without holes" );
98 function testPlain() {
99 // Setting up the dump
100 $nameStub = $this->setUpStub();
101 $nameFull = $this->getNewTempFile();
102 $dumper = new TextPassDumper( array( "--stub=file:" . $nameStub,
103 "--output=file:" . $nameFull ) );
104 $dumper->reporting
= false;
105 $dumper->setDb( $this->db
);
107 // Performing the dump
108 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
110 // Checking for correctness of the dumped data
111 $this->assertDumpStart( $nameFull );
114 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
115 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
116 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
117 "BackupDumperTestP1Text1" );
118 $this->assertPageEnd();
121 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
122 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
123 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
124 "BackupDumperTestP2Text1" );
125 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
126 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
127 "BackupDumperTestP2Text2", $this->revId2_1
);
128 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
129 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
130 "BackupDumperTestP2Text3", $this->revId2_2
);
131 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
132 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
133 "BackupDumperTestP2Text4 some additional Text", $this->revId2_3
);
134 $this->assertPageEnd();
137 // -> Page is marked deleted. Hence not visible
140 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
141 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
142 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
143 "Talk about BackupDumperTestP1 Text1" );
144 $this->assertPageEnd();
146 $this->assertDumpEnd();
149 function testPrefetchPlain() {
150 // The mapping between ids and text, for the hits of the prefetch mock
151 $prefetchMap = array(
152 array( $this->pageId1
, $this->revId1_1
, "Prefetch_________1Text1" ),
153 array( $this->pageId2
, $this->revId2_3
, "Prefetch_________2Text3" )
157 $prefetchMock = $this->getMock( 'BaseDump', array( 'prefetch' ), array(), '', false );
158 $prefetchMock->expects( $this->exactly( 6 ) )
159 ->method( 'prefetch' )
160 ->will( $this->returnValueMap( $prefetchMap ) );
162 // Setting up of the dump
163 $nameStub = $this->setUpStub();
164 $nameFull = $this->getNewTempFile();
165 $dumper = new TextPassDumper( array( "--stub=file:"
166 . $nameStub, "--output=file:" . $nameFull ) );
167 $dumper->prefetch
= $prefetchMock;
168 $dumper->reporting
= false;
169 $dumper->setDb( $this->db
);
171 // Performing the dump
172 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
174 // Checking for correctness of the dumped data
175 $this->assertDumpStart( $nameFull );
178 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
179 // Prefetch kicks in. This is still the SHA-1 of the original text,
180 // But the actual text (with different SHA-1) comes from prefetch.
181 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
182 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
183 "Prefetch_________1Text1" );
184 $this->assertPageEnd();
187 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
188 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
189 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
190 "BackupDumperTestP2Text1" );
191 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
192 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
193 "BackupDumperTestP2Text2", $this->revId2_1
);
194 // Prefetch kicks in. This is still the SHA-1 of the original text,
195 // But the actual text (with different SHA-1) comes from prefetch.
196 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
197 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
198 "Prefetch_________2Text3", $this->revId2_2
);
199 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
200 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
201 "BackupDumperTestP2Text4 some additional Text", $this->revId2_3
);
202 $this->assertPageEnd();
205 // -> Page is marked deleted. Hence not visible
208 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
209 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
210 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
211 "Talk about BackupDumperTestP1 Text1" );
212 $this->assertPageEnd();
214 $this->assertDumpEnd();
218 * Ensures that checkpoint dumps are used and written, by successively increasing the
219 * stub size and dumping until the duration crosses a threshold.
221 * @param $checkpointFormat string: Either "file" for plain text or "gzip" for gzipped
224 private function checkpointHelper( $checkpointFormat = "file" ) {
225 // Getting temporary names
226 $nameStub = $this->getNewTempFile();
227 $nameOutputDir = $this->getNewTempDirectory();
229 $stderr = fopen( 'php://output', 'a' );
230 if ( $stderr === false ) {
231 $this->fail( "Could not open stream for stderr" );
234 $iterations = 32; // We'll start with that many iterations of revisions in stub
236 $minDuration = 2; // We want the dump to take at least this many seconds
237 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
239 // Until a dump takes at least $minDuration seconds, perform a dump and check
240 // duration. If the dump did not take long enough increase the iteration
241 // count, to generate a bigger stub file next time.
242 while ( $lastDuration < $minDuration ) {
244 // Setting up the dump
245 wfRecursiveRemoveDir( $nameOutputDir );
246 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
247 "Creating temporary output directory " );
248 $this->setUpStub( $nameStub, $iterations );
249 $dumper = new TextPassDumper( array( "--stub=file:" . $nameStub,
250 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
251 "--maxtime=1" /*This is in minutes. Fixup is below*/,
252 "--checkpointfile=checkpoint-%s-%s.xml.gz" ) );
253 $dumper->setDb( $this->db
);
254 $dumper->maxTimeAllowed
= $checkpointAfter; // Patching maxTime from 1 minute
255 $dumper->stderr
= $stderr;
257 // The actual dump and taking time
258 $ts_before = microtime( true );
259 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
260 $ts_after = microtime( true );
261 $lastDuration = $ts_after - $ts_before;
263 // Handling increasing the iteration count for the stubs
264 if ( $lastDuration < $minDuration ) {
265 $old_iterations = $iterations;
266 if ( $lastDuration > 0.2 ) {
267 // lastDuration is big enough, to allow an educated guess
268 $factor = ( $minDuration +
0.5 ) / $lastDuration;
269 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
270 // educated guess is reasonable
271 $iterations = (int)( $iterations * $factor );
275 if ( $old_iterations == $iterations ) {
276 // Heuristics were not applied, so we just *2.
280 $this->assertLessThan( 50000, $iterations,
281 "Emergency stop against infinitely increasing iteration "
282 . "count ( last duration: $lastDuration )" );
286 // The dump (hopefully) did take long enough to produce more than one
289 // We now check all the checkpoint files for validity.
291 $files = scandir( $nameOutputDir );
292 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
295 $checkpointFiles = 0;
297 // Each run of the following loop body tries to handle exactly 1 /page/ (not
298 // iteration of stub content). $i is only increased after having treated page 4.
299 for ( $i = 0; $i < $iterations; ) {
301 // 1. Assuring a file is opened and ready. Skipping across header if
303 if ( !$fileOpened ) {
304 $this->assertNotEmpty( $files, "No more existing dump files, "
305 . "but not yet all pages found" );
306 $fname = array_shift( $files );
307 while ( $fname == "." ||
$fname == ".." ) {
308 $this->assertNotEmpty( $files, "No more existing dump"
309 . " files, but not yet all pages found" );
310 $fname = array_shift( $files );
312 if ( $checkpointFormat == "gzip" ) {
313 $this->gunzip( $nameOutputDir . "/" . $fname );
315 $this->assertDumpStart( $nameOutputDir . "/" . $fname );
320 // 2. Performing a single page check
321 switch ( $lookingForPage ) {
324 $this->assertPageStart( $this->pageId1 +
$i * self
::$numOfPages, NS_MAIN
,
325 "BackupDumperTestP1" );
326 $this->assertRevision( $this->revId1_1 +
$i * self
::$numOfRevs, "BackupDumperTestP1Summary1",
327 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
328 "BackupDumperTestP1Text1" );
329 $this->assertPageEnd();
336 $this->assertPageStart( $this->pageId2 +
$i * self
::$numOfPages, NS_MAIN
,
337 "BackupDumperTestP2" );
338 $this->assertRevision( $this->revId2_1 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary1",
339 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
340 "BackupDumperTestP2Text1" );
341 $this->assertRevision( $this->revId2_2 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary2",
342 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
343 "BackupDumperTestP2Text2", $this->revId2_1 +
$i * self
::$numOfRevs );
344 $this->assertRevision( $this->revId2_3 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary3",
345 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
346 "BackupDumperTestP2Text3", $this->revId2_2 +
$i * self
::$numOfRevs );
347 $this->assertRevision( $this->revId2_4 +
$i * self
::$numOfRevs,
348 "BackupDumperTestP2Summary4 extra",
349 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
350 "BackupDumperTestP2Text4 some additional Text",
351 $this->revId2_3 +
$i * self
::$numOfRevs );
352 $this->assertPageEnd();
359 $this->assertPageStart( $this->pageId4 +
$i * self
::$numOfPages, NS_TALK
,
360 "Talk:BackupDumperTestP1" );
361 $this->assertRevision( $this->revId4_1 +
$i * self
::$numOfRevs,
362 "Talk BackupDumperTestP1 Summary1",
363 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
364 "Talk about BackupDumperTestP1 Text1" );
365 $this->assertPageEnd();
369 // We dealt with the whole iteration.
374 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
377 // 3. Checking for the end of the current checkpoint file
378 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
379 && $this->xml
->name
== "mediawiki"
381 $this->assertDumpEnd();
386 // Assuring we completely read all files ...
387 $this->assertFalse( $fileOpened, "Currently read file still open?" );
388 $this->assertEmpty( $files, "Remaining unchecked files" );
390 // ... and have dealt with more than one checkpoint file
391 $this->assertGreaterThan( 1, $checkpointFiles, "expected more than 1 checkpoint to have been created. Checkpoint interval is $checkpointAfter seconds, maybe your computer is too fast?" );
393 $this->expectETAOutput();
399 function testCheckpointPlain() {
400 $this->checkpointHelper();
404 * tests for working checkpoint generation in gzip format work.
406 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
407 * were once problems when the used sinks were DumpPipeOutputs.
409 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
410 * PHP extensions, we go for gzip instead, which triggers the same relevant code
411 * paths while still being testable on more systems.
415 function testCheckpointGzip() {
416 $this->checkHasGzip();
417 $this->checkpointHelper( "gzip" );
422 * Creates a stub file that is used for testing the text pass of dumps
424 * @param $fname string: (Optional) Absolute name of the file to write
425 * the stub into. If this parameter is null, a new temporary
426 * file is generated that is automatically removed upon
428 * @param $iterations integer: (Optional) specifies how often the block
429 * of 3 pages should go into the stub file. The page and
430 * revision id increase further and further, while the text
431 * id of the first iteration is reused. The pages and revision
432 * of iteration > 1 have no corresponding representation in the
434 * @return string absolute filename of the stub
436 private function setUpStub( $fname = null, $iterations = 1 ) {
437 if ( $fname === null ) {
438 $fname = $this->getNewTempFile();
440 $header = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.7/" '
441 . 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
442 . 'xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.7/ '
443 . 'http://www.mediawiki.org/xml/export-0.7.xsd" version="0.7" xml:lang="en">
445 <sitename>wikisvn</sitename>
446 <base>http://localhost/wiki-svn/index.php/Main_Page</base>
447 <generator>MediaWiki 1.21alpha</generator>
448 <case>first-letter</case>
450 <namespace key="-2" case="first-letter">Media</namespace>
451 <namespace key="-1" case="first-letter">Special</namespace>
452 <namespace key="0" case="first-letter" />
453 <namespace key="1" case="first-letter">Talk</namespace>
454 <namespace key="2" case="first-letter">User</namespace>
455 <namespace key="3" case="first-letter">User talk</namespace>
456 <namespace key="4" case="first-letter">Wikisvn</namespace>
457 <namespace key="5" case="first-letter">Wikisvn talk</namespace>
458 <namespace key="6" case="first-letter">File</namespace>
459 <namespace key="7" case="first-letter">File talk</namespace>
460 <namespace key="8" case="first-letter">MediaWiki</namespace>
461 <namespace key="9" case="first-letter">MediaWiki talk</namespace>
462 <namespace key="10" case="first-letter">Template</namespace>
463 <namespace key="11" case="first-letter">Template talk</namespace>
464 <namespace key="12" case="first-letter">Help</namespace>
465 <namespace key="13" case="first-letter">Help talk</namespace>
466 <namespace key="14" case="first-letter">Category</namespace>
467 <namespace key="15" case="first-letter">Category talk</namespace>
471 $tail = '</mediawiki>
475 $iterations = intval( $iterations );
476 for ( $i = 0; $i < $iterations; $i++
) {
479 <title>BackupDumperTestP1</title>
481 <id>' . ( $this->pageId1 +
$i * self
::$numOfPages ) . '</id>
483 <id>' . ( $this->revId1_1 +
$i * self
::$numOfRevs ) . '</id>
484 <timestamp>2012-04-01T16:46:05Z</timestamp>
488 <comment>BackupDumperTestP1Summary1</comment>
489 <sha1>0bolhl6ol7i6x0e7yq91gxgaan39j87</sha1>
490 <model>wikitext</model>
491 <format>text/x-wiki</format>
492 <text id="' . $this->textId1_1
. '" bytes="23" />
497 <title>BackupDumperTestP2</title>
499 <id>' . ( $this->pageId2 +
$i * self
::$numOfPages ) . '</id>
501 <id>' . ( $this->revId2_1 +
$i * self
::$numOfRevs ) . '</id>
502 <timestamp>2012-04-01T16:46:05Z</timestamp>
506 <comment>BackupDumperTestP2Summary1</comment>
507 <sha1>jprywrymfhysqllua29tj3sc7z39dl2</sha1>
508 <model>wikitext</model>
509 <format>text/x-wiki</format>
510 <text id="' . $this->textId2_1
. '" bytes="23" />
513 <id>' . ( $this->revId2_2 +
$i * self
::$numOfRevs ) . '</id>
514 <parentid>' . ( $this->revId2_1 +
$i * self
::$numOfRevs ) . '</parentid>
515 <timestamp>2012-04-01T16:46:05Z</timestamp>
519 <comment>BackupDumperTestP2Summary2</comment>
520 <sha1>b7vj5ks32po5m1z1t1br4o7scdwwy95</sha1>
521 <model>wikitext</model>
522 <format>text/x-wiki</format>
523 <text id="' . $this->textId2_2
. '" bytes="23" />
526 <id>' . ( $this->revId2_3 +
$i * self
::$numOfRevs ) . '</id>
527 <parentid>' . ( $this->revId2_2 +
$i * self
::$numOfRevs ) . '</parentid>
528 <timestamp>2012-04-01T16:46:05Z</timestamp>
532 <comment>BackupDumperTestP2Summary3</comment>
533 <sha1>jfunqmh1ssfb8rs43r19w98k28gg56r</sha1>
534 <model>wikitext</model>
535 <format>text/x-wiki</format>
536 <text id="' . $this->textId2_3
. '" bytes="23" />
539 <id>' . ( $this->revId2_4 +
$i * self
::$numOfRevs ) . '</id>
540 <parentid>' . ( $this->revId2_3 +
$i * self
::$numOfRevs ) . '</parentid>
541 <timestamp>2012-04-01T16:46:05Z</timestamp>
545 <comment>BackupDumperTestP2Summary4 extra</comment>
546 <sha1>6o1ciaxa6pybnqprmungwofc4lv00wv</sha1>
547 <model>wikitext</model>
548 <format>text/x-wiki</format>
549 <text id="' . $this->textId2_4
. '" bytes="44" />
553 // page 3 not in stub
556 <title>Talk:BackupDumperTestP1</title>
558 <id>' . ( $this->pageId4 +
$i * self
::$numOfPages ) . '</id>
560 <id>' . ( $this->revId4_1 +
$i * self
::$numOfRevs ) . '</id>
561 <timestamp>2012-04-01T16:46:05Z</timestamp>
565 <comment>Talk BackupDumperTestP1 Summary1</comment>
566 <sha1>nktofwzd0tl192k3zfepmlzxoax1lpe</sha1>
567 <model>wikitext</model>
568 <format>text/x-wiki</format>
569 <text id="' . $this->textId4_1
. '" bytes="35" />
573 $content .= $page1 . $page2 . $page4;
576 $this->assertEquals( strlen( $content ), file_put_contents(
577 $fname, $content ), "Length of prepared stub" );