* In ResourceLoaderContext, lazy-load $this->direction and $this->language, to avoid...
[mediawiki.git] / maintenance / backupPrefetch.inc
blob2d7b8a9e1572f006fd2bd2ab5dd0d83b018bcdeb
1 <?php
2 /**
3  * Helper class for the --prefetch option of dumpTextPass.php
4  *
5  * @file
6  * @ingrouo Maintenance
7  */
9 /**
10  * Readahead helper for making large MediaWiki data dumps;
11  * reads in a previous XML dump to sequentially prefetch text
12  * records already normalized and decompressed.
13  *
14  * This can save load on the external database servers, hopefully.
15  *
16  * Assumes that dumps will be recorded in the canonical order:
17  * - ascending by page_id
18  * - ascending by rev_id within each page
19  * - text contents are immutable and should not change once
20  *   recorded, so the previous dump is a reliable source
21  *
22  * Requires the XMLReader PECL extension.
23  * @ingroup Maintenance
24  */
25 class BaseDump {
26         var $reader = null;
27         var $atEnd = false;
28         var $atPageEnd = false;
29         var $lastPage = 0;
30         var $lastRev = 0;
32         function BaseDump( $infile ) {
33                 $this->reader = new XMLReader();
34                 $this->reader->open( $infile );
35         }
37         /**
38          * Attempts to fetch the text of a particular page revision
39          * from the dump stream. May return null if the page is
40          * unavailable.
41          *
42          * @param $page Integer: ID number of page to read
43          * @param $rev Integer: ID number of revision to read
44          * @return string or null
45          */
46         function prefetch( $page, $rev ) {
47                 $page = intval( $page );
48                 $rev = intval( $rev );
49                 while ( $this->lastPage < $page && !$this->atEnd ) {
50                         $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
51                         $this->nextPage();
52                 }
53                 if ( $this->lastPage > $page || $this->atEnd ) {
54                         $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev  [$this->lastPage, $this->lastRev]" );
55                         return null;
56                 }
57                 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
58                         $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
59                         $this->nextRev();
60                 }
61                 if ( $this->lastRev == $rev && !$this->atEnd ) {
62                         $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
63                         return $this->nextText();
64                 } else {
65                         $this->debug( "BaseDump::prefetch already past rev $rev on page $page  [$this->lastPage, $this->lastRev]" );
66                         return null;
67                 }
68         }
70         function debug( $str ) {
71                 wfDebug( $str . "\n" );
72                 // global $dumper;
73                 // $dumper->progress( $str );
74         }
76         /**
77          * @access private
78          */
79         function nextPage() {
80                 if ( $this->skipTo( 'page', 'mediawiki' ) ) {
81                         if ( $this->skipTo( 'id' ) ) {
82                                 $this->lastPage = intval( $this->nodeContents() );
83                                 $this->lastRev = 0;
84                                 $this->atPageEnd = false;
85                         }
86                 } else {
87                         $this->atEnd = true;
88                 }
89         }
91         /**
92          * @access private
93          */
94         function nextRev() {
95                 if ( $this->skipTo( 'revision' ) ) {
96                         if ( $this->skipTo( 'id' ) ) {
97                                 $this->lastRev = intval( $this->nodeContents() );
98                         }
99                 } else {
100                         $this->atPageEnd = true;
101                 }
102         }
104         /**
105          * @access private
106          */
107         function nextText() {
108                 $this->skipTo( 'text' );
109                 return strval( $this->nodeContents() );
110         }
112         /**
113          * @access private
114          */
115         function skipTo( $name, $parent = 'page' ) {
116                 if ( $this->atEnd ) {
117                         return false;
118                 }
119                 while ( $this->reader->read() ) {
120                         if ( $this->reader->nodeType == XMLReader::ELEMENT &&
121                                 $this->reader->name == $name ) {
122                                 return true;
123                         }
124                         if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
125                                 $this->reader->name == $parent ) {
126                                 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
127                                 return false;
128                         }
129                 }
130                 return $this->close();
131         }
133         /**
134          * Shouldn't something like this be built-in to XMLReader?
135          * Fetches text contents of the current element, assuming
136          * no sub-elements or such scary things.
137          *
138          * @return String
139          * @access private
140          */
141         function nodeContents() {
142                 if ( $this->atEnd ) {
143                         return null;
144                 }
145                 if ( $this->reader->isEmptyElement ) {
146                         return "";
147                 }
148                 $buffer = "";
149                 while ( $this->reader->read() ) {
150                         switch( $this->reader->nodeType ) {
151                         case XMLReader::TEXT:
152 //                      case XMLReader::WHITESPACE:
153                         case XMLReader::SIGNIFICANT_WHITESPACE:
154                                 $buffer .= $this->reader->value;
155                                 break;
156                         case XMLReader::END_ELEMENT:
157                                 return $buffer;
158                         }
159                 }
160                 return $this->close();
161         }
163         /**
164          * @access private
165          */
166         function close() {
167                 $this->reader->close();
168                 $this->atEnd = true;
169                 return null;
170         }