Follow-up r89835: Accidently comitted from a deeper dir than the diff. Comitting...
[mediawiki.git] / maintenance / backupPrefetch.inc
blobd3c4e790dbd2a890fa8d70e08fbff48f27258e86
1 <?php
2 /**
3  * Helper class for the --prefetch option of dumpTextPass.php
4  *
5  * Copyright © 2005 Brion Vibber <brion@pobox.com>
6  * http://www.mediawiki.org/
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  * http://www.gnu.org/copyleft/gpl.html
22  *
23  * @file
24  * @ingroup Maintenance
25  */
27 /**
28  * Readahead helper for making large MediaWiki data dumps;
29  * reads in a previous XML dump to sequentially prefetch text
30  * records already normalized and decompressed.
31  *
32  * This can save load on the external database servers, hopefully.
33  *
34  * Assumes that dumps will be recorded in the canonical order:
35  * - ascending by page_id
36  * - ascending by rev_id within each page
37  * - text contents are immutable and should not change once
38  *   recorded, so the previous dump is a reliable source
39  *
40  * @ingroup Maintenance
41  */
42 class BaseDump {
43         var $reader = null;
44         var $atEnd = false;
45         var $atPageEnd = false;
46         var $lastPage = 0;
47         var $lastRev = 0;
48         var $infiles = null;
50         function BaseDump( $infile ) {
51                 $this->infiles = explode(';',$infile);
52                 $this->reader = new XMLReader();
53                 $infile = array_shift($this->infiles);
54                 $this->reader->open( $infile );
55         }
57         /**
58          * Attempts to fetch the text of a particular page revision
59          * from the dump stream. May return null if the page is
60          * unavailable.
61          *
62          * @param $page Integer: ID number of page to read
63          * @param $rev Integer: ID number of revision to read
64          * @return string or null
65          */
66         function prefetch( $page, $rev ) {
67                 $page = intval( $page );
68                 $rev = intval( $rev );
69                 while ( $this->lastPage < $page && !$this->atEnd ) {
70                         $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
71                         $this->nextPage();
72                 }
73                 if ( $this->lastPage > $page || $this->atEnd ) {
74                         $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev  [$this->lastPage, $this->lastRev]" );
75                         return null;
76                 }
77                 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
78                         $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
79                         $this->nextRev();
80                 }
81                 if ( $this->lastRev == $rev && !$this->atEnd ) {
82                         $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
83                         return $this->nextText();
84                 } else {
85                         $this->debug( "BaseDump::prefetch already past rev $rev on page $page  [$this->lastPage, $this->lastRev]" );
86                         return null;
87                 }
88         }
90         function debug( $str ) {
91                 wfDebug( $str . "\n" );
92                 // global $dumper;
93                 // $dumper->progress( $str );
94         }
96         /**
97          * @access private
98          */
99         function nextPage() {
100                 if ( $this->skipTo( 'page', 'mediawiki' ) ) {
101                         if ( $this->skipTo( 'id' ) ) {
102                                 $this->lastPage = intval( $this->nodeContents() );
103                                 $this->lastRev = 0;
104                                 $this->atPageEnd = false;
105                         }
106                 } else {
107                         $this->close();
108                         if (count($this->infiles)) {
109                                 $infile = array_shift($this->infiles);
110                                 $this->reader->open( $infile );
111                                 $this->atEnd = false;
112                         }
113                 }
114         }
116         /**
117          * @access private
118          */
119         function nextRev() {
120                 if ( $this->skipTo( 'revision' ) ) {
121                         if ( $this->skipTo( 'id' ) ) {
122                                 $this->lastRev = intval( $this->nodeContents() );
123                         }
124                 } else {
125                         $this->atPageEnd = true;
126                 }
127         }
129         /**
130          * @access private
131          */
132         function nextText() {
133                 $this->skipTo( 'text' );
134                 return strval( $this->nodeContents() );
135         }
137         /**
138          * @access private
139          */
140         function skipTo( $name, $parent = 'page' ) {
141                 if ( $this->atEnd ) {
142                         return false;
143                 }
144                 while ( $this->reader->read() ) {
145                         if ( $this->reader->nodeType == XMLReader::ELEMENT &&
146                                 $this->reader->name == $name ) {
147                                 return true;
148                         }
149                         if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
150                                 $this->reader->name == $parent ) {
151                                 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
152                                 return false;
153                         }
154                 }
155                 return $this->close();
156         }
158         /**
159          * Shouldn't something like this be built-in to XMLReader?
160          * Fetches text contents of the current element, assuming
161          * no sub-elements or such scary things.
162          *
163          * @return String
164          * @access private
165          */
166         function nodeContents() {
167                 if ( $this->atEnd ) {
168                         return null;
169                 }
170                 if ( $this->reader->isEmptyElement ) {
171                         return "";
172                 }
173                 $buffer = "";
174                 while ( $this->reader->read() ) {
175                         switch( $this->reader->nodeType ) {
176                         case XMLReader::TEXT:
177 //                      case XMLReader::WHITESPACE:
178                         case XMLReader::SIGNIFICANT_WHITESPACE:
179                                 $buffer .= $this->reader->value;
180                                 break;
181                         case XMLReader::END_ELEMENT:
182                                 return $buffer;
183                         }
184                 }
185                 return $this->close();
186         }
188         /**
189          * @access private
190          */
191         function close() {
192                 $this->reader->close();
193                 $this->atEnd = true;
194                 return null;
195         }