3 use MediaWiki\Deferred\AutoCommitUpdate
;
4 use MediaWiki\Deferred\DeferredUpdates
;
5 use MediaWiki\Logger\LoggerFactory
;
6 use MediaWiki\MediaWikiServices
;
7 use MediaWiki\Request\WebRequestUpload
;
8 use MediaWiki\Status\Status
;
9 use MediaWiki\User\User
;
10 use Psr\Log\LoggerInterface
;
11 use Wikimedia\FileBackend\FileBackend
;
14 * Backend for uploading files from chunks.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License along
27 * with this program; if not, write to the Free Software Foundation, Inc.,
28 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
29 * http://www.gnu.org/copyleft/gpl.html
36 * Implements uploading from chunks
39 * @author Michael Dale
41 class UploadFromChunks
extends UploadFromFile
{
44 /** @var UploadStash */
52 protected $mChunkIndex;
55 /** @var string|null */
56 protected $mVirtualTempPath;
58 private LoggerInterface
$logger;
60 /** @noinspection PhpMissingParentConstructorInspection */
63 * Setup local pointers to stash, repo and user (similar to UploadFromStash)
66 * @param UploadStash|false $stash Default: false
67 * @param FileRepo|false $repo Default: false
69 public function __construct( User
$user, $stash = false, $repo = false ) {
75 $this->repo
= MediaWikiServices
::getInstance()->getRepoGroup()->getLocalRepo();
79 $this->stash
= $stash;
81 wfDebug( __METHOD__
. " creating new UploadFromChunks instance for " . $user->getId() );
82 $this->stash
= new UploadStash( $this->repo
, $this->user
);
85 $this->logger
= LoggerFactory
::getInstance( 'upload' );
91 public function tryStashFile( User
$user, $isPartial = false ) {
94 } catch ( UploadChunkVerificationException
$e ) {
95 return Status
::newFatal( $e->msg
);
98 return parent
::tryStashFile( $user, $isPartial );
102 * Calls the parent doStashFile and updates the uploadsession table to handle "chunks"
104 * @param User|null $user
105 * @return UploadStashFile Stashed file
107 protected function doStashFile( ?User
$user = null ) {
108 // Stash file is the called on creating a new chunk session:
109 $this->mChunkIndex
= 0;
112 // Create a local stash target
113 $this->mStashFile
= parent
::doStashFile( $user );
114 // Update the initial file offset (based on file size)
115 $this->mOffset
= $this->mStashFile
->getSize();
116 $this->mFileKey
= $this->mStashFile
->getFileKey();
118 // Output a copy of this first to chunk 0 location:
119 $this->outputChunk( $this->mStashFile
->getPath() );
121 // Update db table to reflect initial "chunk" state
122 $this->updateChunkStatus();
124 return $this->mStashFile
;
128 * Continue chunk uploading
130 * @param string $name
132 * @param WebRequestUpload $webRequestUpload
134 public function continueChunks( $name, $key, $webRequestUpload ) {
135 $this->mFileKey
= $key;
136 $this->mUpload
= $webRequestUpload;
137 // Get the chunk status form the db:
138 $this->getChunkStatus();
140 $metadata = $this->stash
->getMetadata( $key );
141 $this->initializePathInfo( $name,
142 $this->getRealPath( $metadata['us_path'] ),
143 $metadata['us_size'],
149 * Append the final chunk and ready file for parent::performUpload()
152 public function concatenateChunks() {
153 $oldFileKey = $this->mFileKey
;
154 $chunkIndex = $this->getChunkIndex();
155 $this->logger
->debug(
156 __METHOD__
. ' concatenate {totalChunks} chunks: {offset} inx: {curIndex}',
158 'offset' => $this->getOffset(),
159 'totalChunks' => $this->mChunkIndex
,
160 'curIndex' => $chunkIndex,
161 'filekey' => $oldFileKey
165 // Concatenate all the chunks to mVirtualTempPath
167 // The first chunk is stored at the mVirtualTempPath path so we start on "chunk 1"
168 for ( $i = 0; $i <= $chunkIndex; $i++
) {
169 $fileList[] = $this->getVirtualChunkLocation( $i );
172 // Get the file extension from the last chunk
173 $ext = FileBackend
::extensionFromPath( $this->mVirtualTempPath
);
174 // Get a 0-byte temp file to perform the concatenation at
175 $tmpFile = MediaWikiServices
::getInstance()->getTempFSFileFactory()
176 ->newTempFSFile( 'chunkedupload_', $ext );
177 $tmpPath = false; // fail in concatenate()
179 // keep alive with $this
180 $tmpPath = $tmpFile->bind( $this )->getPath();
182 $this->logger
->warning( "Error getting tmp file", [ 'filekey' => $oldFileKey ] );
185 // Concatenate the chunks at the temp file
186 $tStart = microtime( true );
187 $status = $this->repo
->concatenate( $fileList, $tmpPath );
188 $tAmount = microtime( true ) - $tStart;
189 if ( !$status->isOK() ) {
190 // This is a backend error and not user-related, so log is safe
191 // Upload verification further on is not safe to log server side
192 $this->logFileBackendStatus(
194 '[{type}] Error on concatenate {chunks} stashed files ({details})',
195 [ 'chunks' => $chunkIndex, 'filekey' => $oldFileKey ]
199 // Delete old chunks in deferred job. Put in deferred job because deleting
200 // lots of chunks can take a long time, sometimes to the point of causing
201 // a timeout, and we do not want that to tank the operation. Note that chunks
202 // are also automatically deleted after a set time by cleanupUploadStash.php
203 // Additionally, using AutoCommitUpdate ensures that we do not delete files
204 // if the main transaction is rolled back for some reason.
205 DeferredUpdates
::addUpdate( new AutoCommitUpdate(
206 $this->repo
->getPrimaryDB(),
208 function () use( $fileList, $oldFileKey ) {
209 $status = $this->repo
->quickPurgeBatch( $fileList );
210 if ( !$status->isOK() ) {
211 $this->logger
->warning(
212 "Could not delete chunks of {filekey} - {status}",
214 'status' => (string)$status,
215 'filekey' => $oldFileKey,
223 wfDebugLog( 'fileconcatenate', "Combined $i chunks in $tAmount seconds." );
225 // File system path of the actual full temp file
226 $this->setTempFile( $tmpPath );
228 $ret = $this->verifyUpload();
229 if ( $ret['status'] !== UploadBase
::OK
) {
231 "Verification failed for chunked upload {filekey}",
233 'user' => $this->user
->getName(),
234 'filekey' => $oldFileKey
237 $status->fatal( $this->getVerificationErrorCode( $ret['status'] ) );
242 // Update the mTempPath and mStashFile
243 // (for FileUpload or normal Stash to take over)
244 $tStart = microtime( true );
245 // This is a re-implementation of UploadBase::tryStashFile(), we can't call it because we
246 // override doStashFile() with completely different functionality in this class...
247 $error = $this->runUploadStashFileHook( $this->user
);
249 $status->fatal( ...$error );
250 $this->logger
->info( "Aborting stash upload due to hook - {status}",
252 'status' => (string)$status,
253 'user' => $this->user
->getName(),
254 'filekey' => $this->mFileKey
260 $this->mStashFile
= parent
::doStashFile( $this->user
);
261 } catch ( UploadStashException
$e ) {
262 $this->logger
->warning( "Could not stash file for {user} because {error} {msg}",
264 'user' => $this->user
->getName(),
265 'error' => get_class( $e ),
266 'msg' => $e->getMessage(),
267 'filekey' => $this->mFileKey
270 $status->fatal( 'uploadstash-exception', get_class( $e ), $e->getMessage() );
274 $tAmount = microtime( true ) - $tStart;
275 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable tmpFile is set when tmpPath is set here
276 $this->mStashFile
->setLocalReference( $tmpFile ); // reuse (e.g. for getImageInfo())
277 $this->logger
->info( "Stashed combined ({chunks} chunks) of {oldkey} under new name {filekey}",
280 'stashTime' => $tAmount,
281 'oldpath' => $this->mVirtualTempPath
,
282 'filekey' => $this->mStashFile
->getFileKey(),
283 'oldkey' => $oldFileKey,
284 'newpath' => $this->mStashFile
->getPath(),
285 'user' => $this->user
->getName()
288 wfDebugLog( 'fileconcatenate', "Stashed combined file ($i chunks) in $tAmount seconds." );
294 * Returns the virtual chunk location:
298 private function getVirtualChunkLocation( $index ) {
299 return $this->repo
->getVirtualUrl( 'temp' ) .
301 $this->repo
->getHashPath(
302 $this->getChunkFileKey( $index )
304 $this->getChunkFileKey( $index );
308 * Add a chunk to the temporary directory
310 * @param string $chunkPath Path to temporary chunk file
311 * @param int $chunkSize Size of the current chunk
312 * @param int $offset Offset of current chunk ( mutch match database chunk offset )
315 public function addChunk( $chunkPath, $chunkSize, $offset ) {
316 // Get the offset before we add the chunk to the file system
317 $preAppendOffset = $this->getOffset();
319 if ( $preAppendOffset +
$chunkSize > $this->getMaxUploadSize() ) {
320 $status = Status
::newFatal( 'file-too-large' );
322 // Make sure the client is uploading the correct chunk with a matching offset.
323 if ( $preAppendOffset == $offset ) {
324 // Update local chunk index for the current chunk
325 $this->mChunkIndex++
;
327 # For some reason mTempPath is set to first part
328 $oldTemp = $this->mTempPath
;
329 $this->mTempPath
= $chunkPath;
330 $this->verifyChunk();
331 $this->mTempPath
= $oldTemp;
332 } catch ( UploadChunkVerificationException
$e ) {
333 $this->logger
->info( "Error verifying upload chunk {msg}",
335 'user' => $this->user
->getName(),
336 'msg' => $e->getMessage(),
337 'chunkIndex' => $this->mChunkIndex
,
338 'filekey' => $this->mFileKey
342 return Status
::newFatal( $e->msg
);
344 $status = $this->outputChunk( $chunkPath );
345 if ( $status->isGood() ) {
346 // Update local offset:
347 $this->mOffset
= $preAppendOffset +
$chunkSize;
348 // Update chunk table status db
349 $this->updateChunkStatus();
352 $status = Status
::newFatal( 'invalid-chunk-offset' );
360 * Update the chunk db table with the current status:
362 private function updateChunkStatus() {
363 $this->logger
->info( "update chunk status for {filekey} offset: {offset} inx: {inx}",
365 'offset' => $this->getOffset(),
366 'inx' => $this->getChunkIndex(),
367 'filekey' => $this->mFileKey
,
368 'user' => $this->user
->getName()
372 $dbw = $this->repo
->getPrimaryDB();
373 $dbw->newUpdateQueryBuilder()
374 ->update( 'uploadstash' )
376 'us_status' => 'chunks',
377 'us_chunk_inx' => $this->getChunkIndex(),
378 'us_size' => $this->getOffset()
380 ->where( [ 'us_key' => $this->mFileKey
] )
381 ->caller( __METHOD__
)->execute();
385 * Get the chunk db state and populate update relevant local values
387 private function getChunkStatus() {
388 // get primary db to avoid race conditions.
389 // Otherwise, if chunk upload time < replag there will be spurious errors
390 $dbw = $this->repo
->getPrimaryDB();
391 $row = $dbw->newSelectQueryBuilder()
392 ->select( [ 'us_chunk_inx', 'us_size', 'us_path' ] )
393 ->from( 'uploadstash' )
394 ->where( [ 'us_key' => $this->mFileKey
] )
395 ->caller( __METHOD__
)->fetchRow();
398 $this->mChunkIndex
= $row->us_chunk_inx
;
399 $this->mOffset
= $row->us_size
;
400 $this->mVirtualTempPath
= $row->us_path
;
405 * Get the current Chunk index
406 * @return int Index of the current chunk
408 private function getChunkIndex() {
409 return $this->mChunkIndex ??
0;
413 * Get the offset at which the next uploaded chunk will be appended to
414 * @return int Current byte offset of the chunk file set
416 public function getOffset() {
417 return $this->mOffset ??
0;
421 * Output the chunk to disk
423 * @param string $chunkPath
424 * @throws UploadChunkFileException
427 private function outputChunk( $chunkPath ) {
428 // Key is fileKey + chunk index
429 $fileKey = $this->getChunkFileKey();
431 // Store the chunk per its indexed fileKey:
432 $hashPath = $this->repo
->getHashPath( $fileKey );
433 $storeStatus = $this->repo
->quickImport( $chunkPath,
434 $this->repo
->getZonePath( 'temp' ) . "/{$hashPath}{$fileKey}" );
436 // Check for error in stashing the chunk:
437 if ( !$storeStatus->isOK() ) {
438 $error = $this->logFileBackendStatus(
440 '[{type}] Error storing chunk in "{chunkPath}" for {fileKey} ({details})',
441 [ 'chunkPath' => $chunkPath, 'fileKey' => $fileKey ]
443 throw new UploadChunkFileException( "Error storing file in '{chunkPath}': " .
444 implode( '; ', $error ), [ 'chunkPath' => $chunkPath ] );
450 private function getChunkFileKey( $index = null ) {
451 return $this->mFileKey
. '.' . ( $index ??
$this->getChunkIndex() );
455 * Verify that the chunk isn't really an evil html file
457 * @throws UploadChunkVerificationException
459 private function verifyChunk() {
460 // Rest mDesiredDestName here so we verify the name as if it were mFileKey
461 $oldDesiredDestName = $this->mDesiredDestName
;
462 $this->mDesiredDestName
= $this->mFileKey
;
463 $this->mTitle
= false;
464 $res = $this->verifyPartialFile();
465 $this->mDesiredDestName
= $oldDesiredDestName;
466 $this->mTitle
= false;
467 if ( is_array( $res ) ) {
468 throw new UploadChunkVerificationException( $res );
473 * Log a status object from FileBackend functions (via FileRepo functions) to the upload log channel.
474 * Return a array with the first error to build up a exception message
476 * @param Status $status
477 * @param string $logMessage
478 * @param array $context
481 private function logFileBackendStatus( Status
$status, string $logMessage, array $context = [] ): array {
482 $logger = $this->logger
;
483 $errorToThrow = null;
484 $warningToThrow = null;
486 foreach ( $status->getErrors() as $errorItem ) {
487 // The message key stands for distinct error situation from the file backend,
488 // each error situation should be shown up in aggregated stats as own point, replace in message
489 $logMessageType = str_replace( '{type}', $errorItem['message'], $logMessage );
491 // The message arguments often contains the name of the failing datacenter or file names
492 // and should not show up in aggregated stats, add to context
493 $context['details'] = implode( '; ', $errorItem['params'] );
494 $context['user'] = $this->user
->getName();
496 if ( $errorItem['type'] === 'error' ) {
497 // Use the first error of the list for the exception text
498 $errorToThrow ??
= [ $errorItem['message'], ...$errorItem['params'] ];
499 $logger->error( $logMessageType, $context );
501 // When no error is found, fall back to the first warning
502 $warningToThrow ??
= [ $errorItem['message'], ...$errorItem['params'] ];
503 $logger->warning( $logMessageType, $context );
506 return $errorToThrow ??
$warningToThrow ??
[ 'unknown', 'no error recorded' ];