Merge "docs: Fix typo"
[mediawiki.git] / maintenance / storage / moveToExternal.php
blobe5371f65faf94a3ee95b6bfed880541c444ba830
1 <?php
2 /**
3 * Move text from the text table to external storage
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Maintenance ExternalStorage
24 use MediaWiki\MainConfigNames;
25 use MediaWiki\Maintenance\UndoLog;
26 use MediaWiki\Storage\SqlBlobStore;
27 use Wikimedia\AtEase\AtEase;
28 use Wikimedia\Rdbms\IExpression;
29 use Wikimedia\Rdbms\LikeValue;
31 // @codeCoverageIgnoreStart
32 require_once __DIR__ . '/../Maintenance.php';
33 // @codeCoverageIgnoreEnd
35 class MoveToExternal extends Maintenance {
36 /** @var ResolveStubs */
37 private $resolveStubs;
38 /** @var int */
39 private $reportingInterval;
40 /** @var int */
41 private $minID;
42 /** @var int */
43 private $maxID;
44 /** @var string */
45 private $esType;
46 /** @var string */
47 private $esLocation;
48 /** @var int */
49 private $threshold;
50 /** @var bool */
51 private $gzip;
52 /** @var bool */
53 private $skipResolve;
54 /** @var string|null */
55 private $legacyEncoding;
56 /** @var bool */
57 private $dryRun;
58 /** @var UndoLog */
59 private $undoLog;
61 public function __construct() {
62 parent::__construct();
64 $this->setBatchSize( 1000 );
66 $this->addOption( 'start', 'start old_id', false, true, 's' );
67 $this->addOption( 'end', 'end old_id', false, true, 'e' );
68 $this->addOption( 'threshold', 'minimum size in bytes', false, true );
69 $this->addOption( 'reporting-interval',
70 'show a message after this many revisions', false, true );
71 $this->addOption( 'undo', 'filename for undo SQL', false, true );
73 $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' );
74 $this->addOption( 'skip-resolve',
75 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
76 $this->addOption( 'iconv', 'Resolve legacy character encoding' );
77 $this->addOption( 'dry-run', 'Don\'t modify any rows' );
79 $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' );
80 $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' );
83 public function execute() {
84 $this->resolveStubs = new ResolveStubs;
85 $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore"
86 $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift"
87 $dbw = $this->getPrimaryDB();
89 $maxID = $this->getOption( 'end' ) ?? $dbw->newSelectQueryBuilder()
90 ->select( 'MAX(old_id)' )
91 ->from( 'text' )
92 ->caller( __METHOD__ )->fetchField();
93 $this->maxID = (int)$maxID;
94 $this->minID = (int)$this->getOption( 'start', 1 );
96 $this->reportingInterval = $this->getOption( 'reporting-interval', 100 );
97 $this->threshold = (int)$this->getOption( 'threshold', 0 );
99 if ( $this->getOption( 'skip-gzip' ) ) {
100 $this->gzip = false;
101 } elseif ( !function_exists( 'gzdeflate' ) ) {
102 $this->fatalError( "gzdeflate() not found. " .
103 "Please run with --skip-gzip if you don't want to compress revisions." );
104 } else {
105 $this->gzip = true;
108 $this->skipResolve = $this->getOption( 'skip-resolve' );
110 if ( $this->getOption( 'iconv' ) ) {
111 $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding );
112 if ( $legacyEncoding ) {
113 $this->legacyEncoding = $legacyEncoding;
114 } else {
115 $this->output( "iconv requested but the wiki has no legacy encoding\n" );
118 $this->dryRun = $this->getOption( 'dry-run', false );
120 $undo = $this->getOption( 'undo' );
121 try {
122 $this->undoLog = new UndoLog( $undo, $dbw );
123 } catch ( RuntimeException $e ) {
124 $this->fatalError( "Unable to open undo log" );
126 $this->resolveStubs->setUndoLog( $this->undoLog );
128 return $this->doMoveToExternal();
131 private function doMoveToExternal() {
132 $success = true;
133 $dbr = $this->getReplicaDB();
135 $count = $this->maxID - $this->minID + 1;
136 $blockSize = $this->getBatchSize();
137 $numBlocks = ceil( $count / $blockSize );
138 print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
140 $esFactory = $this->getServiceContainer()->getExternalStoreFactory();
141 $extStore = $esFactory->getStore( $this->esType );
142 $numMoved = 0;
143 $stubIDs = [];
145 for ( $block = 0; $block < $numBlocks; $block++ ) {
146 $blockStart = $block * $blockSize + $this->minID;
147 $blockEnd = $blockStart + $blockSize - 1;
149 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
150 $this->output( "oldid=$blockStart, moved=$numMoved\n" );
151 $this->waitForReplication();
154 $res = $dbr->newSelectQueryBuilder()
155 ->select( [ 'old_id', 'old_flags', 'old_text' ] )
156 ->from( 'text' )
157 ->where( $this->getConditions( $blockStart, $blockEnd, $dbr ) )
158 ->caller( __METHOD__ )->fetchResultSet();
159 foreach ( $res as $row ) {
160 $text = $row->old_text;
161 $id = $row->old_id;
162 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
163 [ $text, $flags ] = $this->resolveText( $text, $flags );
165 if ( $text === false ) {
166 $success = false;
169 if ( in_array( 'error', $flags ) ) {
170 continue;
171 } elseif ( in_array( 'object', $flags ) ) {
172 $obj = unserialize( $text );
173 if ( $obj instanceof HistoryBlobStub ) {
174 // Handle later, after CGZ resolution
175 if ( !$this->skipResolve ) {
176 $stubIDs[] = $id;
178 continue;
179 } elseif ( $obj instanceof HistoryBlobCurStub ) {
180 // Copy cur text to ES
181 $newText = $obj->getText();
182 if ( $newText === false ) {
183 print "Warning: Could not fetch revision blob {$id}: {$text}\n";
184 $success = false;
185 continue;
188 [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] );
190 if ( $text === false ) {
191 print "Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n";
192 $success = false;
193 continue;
196 [ $text, $flags ] = $this->compress( $text, $flags );
197 } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) {
198 // Store as is
199 } else {
200 $className = get_class( $obj );
201 print "Warning: old_id=$id unrecognised object class \"$className\"\n";
202 $success = false;
203 continue;
205 } elseif ( strlen( $text ) < $this->threshold ) {
206 // Don't move small revisions
207 continue;
208 } else {
209 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
210 [ $newText, $flags ] = $this->compress( $text, $flags );
211 if ( $newText === false ) {
212 print "Warning: Could not compress revision blob {$id}: {$text}\n";
213 $success = false;
214 continue;
216 $text = $newText;
218 $flags[] = 'external';
219 $flagsString = implode( ',', $flags );
221 if ( $this->dryRun ) {
222 $this->output( "Move $id => $flagsString " .
223 addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) .
224 "\n"
226 continue;
229 $url = $extStore->store( $this->esLocation, $text );
230 if ( !$url ) {
231 $this->fatalError( "Error writing to external storage" );
233 $moved = $this->undoLog->update(
234 'text',
235 [ 'old_flags' => $flagsString, 'old_text' => $url ],
236 (array)$row,
237 __METHOD__
239 if ( $moved ) {
240 $numMoved++;
241 } else {
242 print "Update of old_id $id failed, affected zero rows\n";
243 $success = false;
248 if ( count( $stubIDs ) ) {
249 $this->resolveStubs( $stubIDs );
252 return $success;
255 private function compress( $text, $flags ) {
256 if ( $this->gzip && !in_array( 'gzip', $flags ) ) {
257 $flags[] = 'gzip';
258 $text = gzdeflate( $text );
260 return [ $text, $flags ];
263 private function resolveLegacyEncoding( $text, $flags ) {
264 if ( $this->legacyEncoding !== null
265 && !in_array( 'utf-8', $flags )
266 && !in_array( 'utf8', $flags )
268 // First decompress the entry so we don't try to convert a binary gzip to utf-8
269 if ( in_array( 'gzip', $flags ) ) {
270 if ( !$this->gzip ) {
271 return [ $text, $flags ];
273 $flags = array_diff( $flags, [ 'gzip' ] );
274 $newText = gzinflate( $text );
275 if ( $newText === false ) {
276 return [ false, $flags ];
278 $text = $newText;
280 AtEase::suppressWarnings();
281 $newText = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text );
282 AtEase::restoreWarnings();
283 if ( $newText === false ) {
284 return [ false, $flags ];
286 $text = $newText;
287 $flags[] = 'utf-8';
289 return [ $text, $flags ];
292 private function resolveStubs( $stubIDs ) {
293 if ( $this->dryRun ) {
294 print "Note: resolving stubs in dry run mode is expected to fail, " .
295 "because the main blobs have not been moved to external storage.\n";
298 $dbr = $this->getReplicaDB();
299 $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" );
300 $numResolved = 0;
301 $numTotal = 0;
302 foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) {
303 $res = $dbr->newSelectQueryBuilder()
304 ->select( [ 'old_id', 'old_flags', 'old_text' ] )
305 ->from( 'text' )
306 ->where( [ 'old_id' => $stubBatch ] )
307 ->caller( __METHOD__ )->fetchResultSet();
308 foreach ( $res as $row ) {
309 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
310 $numTotal++;
311 if ( $this->reportingInterval
312 && $numTotal % $this->reportingInterval == 0
314 $this->output( "$numTotal stubs processed\n" );
315 $this->waitForReplication();
319 $this->output( "$numResolved of $numTotal stubs resolved\n" );
322 protected function getConditions( $blockStart, $blockEnd, $dbr ) {
323 return [
324 $dbr->expr( 'old_id', '>=', $blockStart ),
325 $dbr->expr( 'old_id', '>=', $blockEnd ),
326 $dbr->expr( 'old_flags', IExpression::NOT_LIKE,
327 new LikeValue( $dbr->anyString(), 'external', $dbr->anyString() ) ),
331 protected function resolveText( $text, $flags ) {
332 return [ $text, $flags ];
336 // @codeCoverageIgnoreStart
337 $maintClass = MoveToExternal::class;
338 require_once RUN_MAINTENANCE_IF_MAIN;
339 // @codeCoverageIgnoreEnd