Merge "rest: Return a 400 for invalid render IDs"
[mediawiki.git] / includes / upload / UploadFromChunks.php
blob71401d918b1ed67512d5c809a24f68a103700941
1 <?php
3 use MediaWiki\Deferred\AutoCommitUpdate;
4 use MediaWiki\Deferred\DeferredUpdates;
5 use MediaWiki\Logger\LoggerFactory;
6 use MediaWiki\MediaWikiServices;
7 use MediaWiki\Request\WebRequestUpload;
8 use MediaWiki\Status\Status;
9 use MediaWiki\User\User;
10 use Psr\Log\LoggerInterface;
11 use Wikimedia\FileBackend\FileBackend;
13 /**
14 * Backend for uploading files from chunks.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License along
27 * with this program; if not, write to the Free Software Foundation, Inc.,
28 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
29 * http://www.gnu.org/copyleft/gpl.html
31 * @file
32 * @ingroup Upload
35 /**
36 * Implements uploading from chunks
38 * @ingroup Upload
39 * @author Michael Dale
41 class UploadFromChunks extends UploadFromFile {
42 /** @var LocalRepo */
43 private $repo;
44 /** @var UploadStash */
45 public $stash;
46 /** @var User */
47 public $user;
49 /** @var int|null */
50 protected $mOffset;
51 /** @var int|null */
52 protected $mChunkIndex;
53 /** @var string */
54 protected $mFileKey;
55 /** @var string|null */
56 protected $mVirtualTempPath;
58 private LoggerInterface $logger;
60 /** @noinspection PhpMissingParentConstructorInspection */
62 /**
63 * Setup local pointers to stash, repo and user (similar to UploadFromStash)
65 * @param User $user
66 * @param UploadStash|false $stash Default: false
67 * @param FileRepo|false $repo Default: false
69 public function __construct( User $user, $stash = false, $repo = false ) {
70 $this->user = $user;
72 if ( $repo ) {
73 $this->repo = $repo;
74 } else {
75 $this->repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo();
78 if ( $stash ) {
79 $this->stash = $stash;
80 } else {
81 wfDebug( __METHOD__ . " creating new UploadFromChunks instance for " . $user->getId() );
82 $this->stash = new UploadStash( $this->repo, $this->user );
85 $this->logger = LoggerFactory::getInstance( 'upload' );
88 /**
89 * @inheritDoc
91 public function tryStashFile( User $user, $isPartial = false ) {
92 try {
93 $this->verifyChunk();
94 } catch ( UploadChunkVerificationException $e ) {
95 return Status::newFatal( $e->msg );
98 return parent::tryStashFile( $user, $isPartial );
102 * Calls the parent doStashFile and updates the uploadsession table to handle "chunks"
104 * @param User|null $user
105 * @return UploadStashFile Stashed file
107 protected function doStashFile( ?User $user = null ) {
108 // Stash file is the called on creating a new chunk session:
109 $this->mChunkIndex = 0;
110 $this->mOffset = 0;
112 // Create a local stash target
113 $this->mStashFile = parent::doStashFile( $user );
114 // Update the initial file offset (based on file size)
115 $this->mOffset = $this->mStashFile->getSize();
116 $this->mFileKey = $this->mStashFile->getFileKey();
118 // Output a copy of this first to chunk 0 location:
119 $this->outputChunk( $this->mStashFile->getPath() );
121 // Update db table to reflect initial "chunk" state
122 $this->updateChunkStatus();
124 return $this->mStashFile;
128 * Continue chunk uploading
130 * @param string $name
131 * @param string $key
132 * @param WebRequestUpload $webRequestUpload
134 public function continueChunks( $name, $key, $webRequestUpload ) {
135 $this->mFileKey = $key;
136 $this->mUpload = $webRequestUpload;
137 // Get the chunk status form the db:
138 $this->getChunkStatus();
140 $metadata = $this->stash->getMetadata( $key );
141 $this->initializePathInfo( $name,
142 $this->getRealPath( $metadata['us_path'] ),
143 $metadata['us_size'],
144 false
149 * Append the final chunk and ready file for parent::performUpload()
150 * @return Status
152 public function concatenateChunks() {
153 $oldFileKey = $this->mFileKey;
154 $chunkIndex = $this->getChunkIndex();
155 $this->logger->debug(
156 __METHOD__ . ' concatenate {totalChunks} chunks: {offset} inx: {curIndex}',
158 'offset' => $this->getOffset(),
159 'totalChunks' => $this->mChunkIndex,
160 'curIndex' => $chunkIndex,
161 'filekey' => $oldFileKey
165 // Concatenate all the chunks to mVirtualTempPath
166 $fileList = [];
167 // The first chunk is stored at the mVirtualTempPath path so we start on "chunk 1"
168 for ( $i = 0; $i <= $chunkIndex; $i++ ) {
169 $fileList[] = $this->getVirtualChunkLocation( $i );
172 // Get the file extension from the last chunk
173 $ext = FileBackend::extensionFromPath( $this->mVirtualTempPath );
174 // Get a 0-byte temp file to perform the concatenation at
175 $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory()
176 ->newTempFSFile( 'chunkedupload_', $ext );
177 $tmpPath = false; // fail in concatenate()
178 if ( $tmpFile ) {
179 // keep alive with $this
180 $tmpPath = $tmpFile->bind( $this )->getPath();
181 } else {
182 $this->logger->warning( "Error getting tmp file", [ 'filekey' => $oldFileKey ] );
185 // Concatenate the chunks at the temp file
186 $tStart = microtime( true );
187 $status = $this->repo->concatenate( $fileList, $tmpPath );
188 $tAmount = microtime( true ) - $tStart;
189 if ( !$status->isOK() ) {
190 // This is a backend error and not user-related, so log is safe
191 // Upload verification further on is not safe to log server side
192 $this->logFileBackendStatus(
193 $status,
194 '[{type}] Error on concatenate {chunks} stashed files ({details})',
195 [ 'chunks' => $chunkIndex, 'filekey' => $oldFileKey ]
197 return $status;
198 } else {
199 // Delete old chunks in deferred job. Put in deferred job because deleting
200 // lots of chunks can take a long time, sometimes to the point of causing
201 // a timeout, and we do not want that to tank the operation. Note that chunks
202 // are also automatically deleted after a set time by cleanupUploadStash.php
203 // Additionally, using AutoCommitUpdate ensures that we do not delete files
204 // if the main transaction is rolled back for some reason.
205 DeferredUpdates::addUpdate( new AutoCommitUpdate(
206 $this->repo->getPrimaryDB(),
207 __METHOD__,
208 function () use( $fileList, $oldFileKey ) {
209 $status = $this->repo->quickPurgeBatch( $fileList );
210 if ( !$status->isOK() ) {
211 $this->logger->warning(
212 "Could not delete chunks of {filekey} - {status}",
214 'status' => (string)$status,
215 'filekey' => $oldFileKey,
220 ) );
223 wfDebugLog( 'fileconcatenate', "Combined $i chunks in $tAmount seconds." );
225 // File system path of the actual full temp file
226 $this->setTempFile( $tmpPath );
228 $ret = $this->verifyUpload();
229 if ( $ret['status'] !== UploadBase::OK ) {
230 $this->logger->info(
231 "Verification failed for chunked upload {filekey}",
233 'user' => $this->user->getName(),
234 'filekey' => $oldFileKey
237 $status->fatal( $this->getVerificationErrorCode( $ret['status'] ) );
239 return $status;
242 // Update the mTempPath and mStashFile
243 // (for FileUpload or normal Stash to take over)
244 $tStart = microtime( true );
245 // This is a re-implementation of UploadBase::tryStashFile(), we can't call it because we
246 // override doStashFile() with completely different functionality in this class...
247 $error = $this->runUploadStashFileHook( $this->user );
248 if ( $error ) {
249 $status->fatal( ...$error );
250 $this->logger->info( "Aborting stash upload due to hook - {status}",
252 'status' => (string)$status,
253 'user' => $this->user->getName(),
254 'filekey' => $this->mFileKey
257 return $status;
259 try {
260 $this->mStashFile = parent::doStashFile( $this->user );
261 } catch ( UploadStashException $e ) {
262 $this->logger->warning( "Could not stash file for {user} because {error} {msg}",
264 'user' => $this->user->getName(),
265 'error' => get_class( $e ),
266 'msg' => $e->getMessage(),
267 'filekey' => $this->mFileKey
270 $status->fatal( 'uploadstash-exception', get_class( $e ), $e->getMessage() );
271 return $status;
274 $tAmount = microtime( true ) - $tStart;
275 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable tmpFile is set when tmpPath is set here
276 $this->mStashFile->setLocalReference( $tmpFile ); // reuse (e.g. for getImageInfo())
277 $this->logger->info( "Stashed combined ({chunks} chunks) of {oldkey} under new name {filekey}",
279 'chunks' => $i,
280 'stashTime' => $tAmount,
281 'oldpath' => $this->mVirtualTempPath,
282 'filekey' => $this->mStashFile->getFileKey(),
283 'oldkey' => $oldFileKey,
284 'newpath' => $this->mStashFile->getPath(),
285 'user' => $this->user->getName()
288 wfDebugLog( 'fileconcatenate', "Stashed combined file ($i chunks) in $tAmount seconds." );
290 return $status;
294 * Returns the virtual chunk location:
295 * @param int $index
296 * @return string
298 private function getVirtualChunkLocation( $index ) {
299 return $this->repo->getVirtualUrl( 'temp' ) .
300 '/' .
301 $this->repo->getHashPath(
302 $this->getChunkFileKey( $index )
304 $this->getChunkFileKey( $index );
308 * Add a chunk to the temporary directory
310 * @param string $chunkPath Path to temporary chunk file
311 * @param int $chunkSize Size of the current chunk
312 * @param int $offset Offset of current chunk ( mutch match database chunk offset )
313 * @return Status
315 public function addChunk( $chunkPath, $chunkSize, $offset ) {
316 // Get the offset before we add the chunk to the file system
317 $preAppendOffset = $this->getOffset();
319 if ( $preAppendOffset + $chunkSize > $this->getMaxUploadSize() ) {
320 $status = Status::newFatal( 'file-too-large' );
321 } else {
322 // Make sure the client is uploading the correct chunk with a matching offset.
323 if ( $preAppendOffset == $offset ) {
324 // Update local chunk index for the current chunk
325 $this->mChunkIndex++;
326 try {
327 # For some reason mTempPath is set to first part
328 $oldTemp = $this->mTempPath;
329 $this->mTempPath = $chunkPath;
330 $this->verifyChunk();
331 $this->mTempPath = $oldTemp;
332 } catch ( UploadChunkVerificationException $e ) {
333 $this->logger->info( "Error verifying upload chunk {msg}",
335 'user' => $this->user->getName(),
336 'msg' => $e->getMessage(),
337 'chunkIndex' => $this->mChunkIndex,
338 'filekey' => $this->mFileKey
342 return Status::newFatal( $e->msg );
344 $status = $this->outputChunk( $chunkPath );
345 if ( $status->isGood() ) {
346 // Update local offset:
347 $this->mOffset = $preAppendOffset + $chunkSize;
348 // Update chunk table status db
349 $this->updateChunkStatus();
351 } else {
352 $status = Status::newFatal( 'invalid-chunk-offset' );
356 return $status;
360 * Update the chunk db table with the current status:
362 private function updateChunkStatus() {
363 $this->logger->info( "update chunk status for {filekey} offset: {offset} inx: {inx}",
365 'offset' => $this->getOffset(),
366 'inx' => $this->getChunkIndex(),
367 'filekey' => $this->mFileKey,
368 'user' => $this->user->getName()
372 $dbw = $this->repo->getPrimaryDB();
373 $dbw->newUpdateQueryBuilder()
374 ->update( 'uploadstash' )
375 ->set( [
376 'us_status' => 'chunks',
377 'us_chunk_inx' => $this->getChunkIndex(),
378 'us_size' => $this->getOffset()
380 ->where( [ 'us_key' => $this->mFileKey ] )
381 ->caller( __METHOD__ )->execute();
385 * Get the chunk db state and populate update relevant local values
387 private function getChunkStatus() {
388 // get primary db to avoid race conditions.
389 // Otherwise, if chunk upload time < replag there will be spurious errors
390 $dbw = $this->repo->getPrimaryDB();
391 $row = $dbw->newSelectQueryBuilder()
392 ->select( [ 'us_chunk_inx', 'us_size', 'us_path' ] )
393 ->from( 'uploadstash' )
394 ->where( [ 'us_key' => $this->mFileKey ] )
395 ->caller( __METHOD__ )->fetchRow();
396 // Handle result:
397 if ( $row ) {
398 $this->mChunkIndex = $row->us_chunk_inx;
399 $this->mOffset = $row->us_size;
400 $this->mVirtualTempPath = $row->us_path;
405 * Get the current Chunk index
406 * @return int Index of the current chunk
408 private function getChunkIndex() {
409 return $this->mChunkIndex ?? 0;
413 * Get the offset at which the next uploaded chunk will be appended to
414 * @return int Current byte offset of the chunk file set
416 public function getOffset() {
417 return $this->mOffset ?? 0;
421 * Output the chunk to disk
423 * @param string $chunkPath
424 * @throws UploadChunkFileException
425 * @return Status
427 private function outputChunk( $chunkPath ) {
428 // Key is fileKey + chunk index
429 $fileKey = $this->getChunkFileKey();
431 // Store the chunk per its indexed fileKey:
432 $hashPath = $this->repo->getHashPath( $fileKey );
433 $storeStatus = $this->repo->quickImport( $chunkPath,
434 $this->repo->getZonePath( 'temp' ) . "/{$hashPath}{$fileKey}" );
436 // Check for error in stashing the chunk:
437 if ( !$storeStatus->isOK() ) {
438 $error = $this->logFileBackendStatus(
439 $storeStatus,
440 '[{type}] Error storing chunk in "{chunkPath}" for {fileKey} ({details})',
441 [ 'chunkPath' => $chunkPath, 'fileKey' => $fileKey ]
443 throw new UploadChunkFileException( "Error storing file in '{chunkPath}': " .
444 implode( '; ', $error ), [ 'chunkPath' => $chunkPath ] );
447 return $storeStatus;
450 private function getChunkFileKey( $index = null ) {
451 return $this->mFileKey . '.' . ( $index ?? $this->getChunkIndex() );
455 * Verify that the chunk isn't really an evil html file
457 * @throws UploadChunkVerificationException
459 private function verifyChunk() {
460 // Rest mDesiredDestName here so we verify the name as if it were mFileKey
461 $oldDesiredDestName = $this->mDesiredDestName;
462 $this->mDesiredDestName = $this->mFileKey;
463 $this->mTitle = false;
464 $res = $this->verifyPartialFile();
465 $this->mDesiredDestName = $oldDesiredDestName;
466 $this->mTitle = false;
467 if ( is_array( $res ) ) {
468 throw new UploadChunkVerificationException( $res );
473 * Log a status object from FileBackend functions (via FileRepo functions) to the upload log channel.
474 * Return a array with the first error to build up a exception message
476 * @param Status $status
477 * @param string $logMessage
478 * @param array $context
479 * @return array
481 private function logFileBackendStatus( Status $status, string $logMessage, array $context = [] ): array {
482 $logger = $this->logger;
483 $errorToThrow = null;
484 $warningToThrow = null;
486 foreach ( $status->getErrors() as $errorItem ) {
487 // The message key stands for distinct error situation from the file backend,
488 // each error situation should be shown up in aggregated stats as own point, replace in message
489 $logMessageType = str_replace( '{type}', $errorItem['message'], $logMessage );
491 // The message arguments often contains the name of the failing datacenter or file names
492 // and should not show up in aggregated stats, add to context
493 $context['details'] = implode( '; ', $errorItem['params'] );
494 $context['user'] = $this->user->getName();
496 if ( $errorItem['type'] === 'error' ) {
497 // Use the first error of the list for the exception text
498 $errorToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ];
499 $logger->error( $logMessageType, $context );
500 } else {
501 // When no error is found, fall back to the first warning
502 $warningToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ];
503 $logger->warning( $logMessageType, $context );
506 return $errorToThrow ?? $warningToThrow ?? [ 'unknown', 'no error recorded' ];