* Fix talk page move handling
[mediawiki.git] / maintenance / compressOld.inc
blobf6a9f430351cfb182b0f1d536e5f66333b4c946d
1 <?php
2 /**
3  * @package MediaWiki
4  * @subpackage Maintenance
5  */
7 /** */
8 require_once( 'Revision.php' );
10 /** @todo document */
11 function compressOldPages( $start = 0 ) {
12         $fname = 'compressOldPages';
14         $chunksize = 50;
15         print "Starting from old_id $start...\n";
16         $dbw =& wfGetDB( DB_MASTER );
17         $old = $dbw->tableName( 'old' );
18         do {
19                 $end = $start + $chunksize;
20                 $res = $dbw->select( 'old', array( 'old_id','old_flags','old_namespace','old_title','old_text' ),
21                         "old_id>=$start", $fname, array( 'ORDER BY' => 'old_id', 'LIMIT' => $chunksize, 'FOR UPDATE' ) );
22                 if( $dbw->numRows( $res ) == 0 ) {
23                         break;
24                 }
25                 $last = $start;
26                 while( $row = $dbw->fetchObject( $res ) ) {
27                         # print "  {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n";
28                         compressPage( $row );
29                         $last = $row->old_id;
30                 }
31                 $dbw->freeResult( $res );
32                 $start = $last + 1; # Deletion may leave long empty stretches
33                 print "$start...\n";
34         } while( true );
37 /** @todo document */
38 function compressPage( $row ) {
39         $fname = 'compressPage';
40         if( false !== strpos( $row->old_flags, "gzip" ) ) {
41                 print "Already compressed row {$row->old_id}?\n";
42                 return false;
43         }
44         $dbw =& wfGetDB( DB_MASTER );
45         $flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip";
46         $compress = gzdeflate( $row->old_text );
47         $dbw->update( 'old', 
48                 array( /* SET */
49                         'old_flags' => $flags,
50                         'old_text' => $compress
51                 ), array( /* WHERE */
52                         'old_id' => $row->old_id
53                 ), $fname, 'LIMIT 1'
54         );
55         return true;
58 define( 'LS_INDIVIDUAL', 0 );
59 define( 'LS_CHUNKED', 1 );
61 /** @todo document */
62 function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorThreshold, $beginDate, $endDate )
64         $fname = 'compressWithConcat';
65         $loadStyle = LS_CHUNKED;
66         
67         $dbr =& wfGetDB( DB_SLAVE );
68         $dbw =& wfGetDB( DB_MASTER );
70         # Get all articles by page_id
71         $maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', $fname );
72         $pageConds = array();
74         if ( $exclude_ns0 ) {
75                 print "Excluding main namespace\n";
76                 $pageConds[] = 'page_namespace<>0';
77         }
78         if ( $queryExtra ) {
79                 $pageConds[] = $queryExtra;
80         }
82         # For each article, get a list of revisions which fit the criteria
83         # No recompression, use a condition on old_flags
84         $conds = array("old_flags NOT LIKE '%object%'");
85         
86         if ( $beginDate ) {
87                 $conds[] = "rev_timestamp>'" . $beginDate . "'";
88         } 
89         if ( $endDate )  {
90                 $conds[] = "rev_timestamp<'" . $endDate . "'";
91         }
92         if ( $loadStyle == LS_CHUNKED ) {
93                 $tables = array( 'revision', 'text' );
94                 $fields = array( 'rev_id', 'rev_text_id', 'old_flags', 'old_text' );
95                 $conds[] = 'rev_text_id=old_id';
96                 $revLoadOptions = 'FOR UPDATE';
97         } else {
98                 $tables = array( 'revision' );
99                 $fields = array( 'rev_id', 'rev_text_id' );
100                 $revLoadOptions = array();
101         }
103         $oldReadsSinceLastSlaveWait = 0;        #check slave lag periodically
104         $totalMatchingRevisions = 0;
105         $masterPos = false;
106         for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) {
107                 $pageRes = $dbr->select( 'page', array('page_id', 'page_namespace', 'page_title'), 
108                         $pageConds + array('page_id' => $pageId), $fname );
109                 if ( $dbr->numRows( $pageRes ) == 0 ) {
110                         continue;
111                 }
112                 $pageRow = $dbr->fetchObject( $pageRes );
114                 # Display progress
115                 $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title );
116                 print "$pageId\t" . $titleObj->getPrefixedDBkey() . " ";
118                 # Load revisions
119                 $revRes = $dbw->select( $tables, $fields,
120                         array( 'rev_page' => $pageRow->page_id ) + $conds, 
121                         $fname,
122                         $revLoadOptions
123                 );
124                 $revs = array();
125                 while ( $revRow = $dbw->fetchObject( $revRes ) ) {
126                         $revs[] = $revRow;
127                 }
128                 
129                 if ( count( $revs ) < 2) {
130                         # No revisions matching, no further processing
131                         print "\n";
132                         continue;
133                 }
135                 # For each chunk
136                 $i = 0;
137                 while ( $i < count( $revs ) ) {
138                         if ( $i < count( $revs ) - $maxChunkSize ) {
139                                 $thisChunkSize = $maxChunkSize;
140                         } else {
141                                 $thisChunkSize = count( $revs ) - $i;
142                         }
144                         $chunk = new ConcatenatedGzipHistoryBlob();
145                         $stubs = array();
146                         $dbw->begin();
147                         $usedChunk = false;
148                         $primaryOldid = $revs[$i]->rev_text_id;
149                         
150                         # Get the text of each revision and add it to the object
151                         for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy( $maxChunkFactor, $factorThreshold ); $j++ ) {
152                                 $oldid = $revs[$i + $j]->rev_text_id;
153                                 
154                                 # Get text
155                                 if ( $loadStyle == LS_INDIVIDUAL ) {
156                                         $textRow = $dbw->selectRow( 'text', 
157                                                 array( 'old_flags', 'old_text' ),
158                                                 array( 'old_id' => $oldid ),
159                                                 $fname,
160                                                 'FOR UPDATE'
161                                         );
162                                         $text = Revision::getRevisionText( $textRow );
163                                 } else {
164                                         $text = Revision::getRevisionText( $revs[$i + $j] );
165                                 }
167                                 if ( $text === false ) {
168                                         print "\nError, unable to get text in old_id $oldid\n";
169                                         #$dbw->delete( 'old', array( 'old_id' => $oldid ) );
170                                 }
172                                 if ( $j == 0 ) {
173                                         $chunk->setText( $text );
174                                         print '.';
175                                 } else {
176                                         # Don't make a stub if it's going to be longer than the article
177                                         # Stubs are typically about 100 bytes
178                                         if ( strlen( $text ) < 120 ) {
179                                                 $stub = false;
180                                                 print 'x';
181                                         } else {
182                                                 $stub = $chunk->addItem( $text );
183                                                 $stub->setLocation( $primaryOldid );
184                                                 $hash = $stub->getHash();
185                                                 $stub = serialize( $stub );
186                                                 print '.';
187                                                 $usedChunk = true;
188                                         }
189                                         $stubs[$j] = $stub;
190                                 }
191                         }
192                         $thisChunkSize = $j;
193                         
194                         # If we couldn't actually use any stubs because the pages were too small, do nothing
195                         if ( $usedChunk ) {
196                                 # Store the main object
197                                 $dbw->update( 'text',
198                                         array( /* SET */
199                                                 'old_text' => serialize( $chunk ),
200                                                 'old_flags' => 'object',
201                                         ), array( /* WHERE */
202                                                 'old_id' => $primaryOldid
203                                         )
204                                 );
206                                 # Store the stub objects
207                                 for ( $j = 1; $j < $thisChunkSize; $j++ ) {
208                                         # Skip if not compressing
209                                         if ( $stubs[$j] !== false ) {
210                                                 $dbw->update( 'text',
211                                                         array( /* SET */
212                                                                 'old_text' => $stubs[$j],
213                                                                 'old_flags' => 'object',
214                                                         ), array( /* WHERE */
215                                                                 'old_id' => $revs[$i + $j]->rev_text_id
216                                                         )
217                                                 );
218                                         }
219                                 }
220                         }
221                         # Done, next
222                         print "/";
223                         $dbw->commit();
224                         $i += $thisChunkSize;
225                 }
226                 print "\n";
227         }
228         return true;