Merge "Special:Upload should not crash on failing previews"
[mediawiki.git] / includes / media / MediaHandler.php
blob2a735a217785d71cac7848952614213fd19519ba
1 <?php
2 /**
3 * Media-handling base classes and generic functionality.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Media
23 use MediaWiki\MediaWikiServices;
25 /**
26 * Base media handler class
28 * @ingroup Media
30 abstract class MediaHandler {
31 const TRANSFORM_LATER = 1;
32 const METADATA_GOOD = true;
33 const METADATA_BAD = false;
34 const METADATA_COMPATIBLE = 2; // for old but backwards compatible.
35 /**
36 * Max length of error logged by logErrorForExternalProcess()
38 const MAX_ERR_LOG_SIZE = 65535;
40 /**
41 * Get a MediaHandler for a given MIME type from the instance cache
43 * @param string $type
44 * @return MediaHandler|bool
46 static function getHandler( $type ) {
47 return MediaWikiServices::getInstance()
48 ->getMediaHandlerFactory()->getHandler( $type );
51 /**
52 * Get an associative array mapping magic word IDs to parameter names.
53 * Will be used by the parser to identify parameters.
55 abstract public function getParamMap();
57 /**
58 * Validate a thumbnail parameter at parse time.
59 * Return true to accept the parameter, and false to reject it.
60 * If you return false, the parser will do something quiet and forgiving.
62 * @param string $name
63 * @param mixed $value
65 abstract public function validateParam( $name, $value );
67 /**
68 * Merge a parameter array into a string appropriate for inclusion in filenames
70 * @param array $params Array of parameters that have been through normaliseParams.
71 * @return string
73 abstract public function makeParamString( $params );
75 /**
76 * Parse a param string made with makeParamString back into an array
78 * @param string $str The parameter string without file name (e.g. 122px)
79 * @return array|bool Array of parameters or false on failure.
81 abstract public function parseParamString( $str );
83 /**
84 * Changes the parameter array as necessary, ready for transformation.
85 * Should be idempotent.
86 * Returns false if the parameters are unacceptable and the transform should fail
87 * @param File $image
88 * @param array $params
90 abstract function normaliseParams( $image, &$params );
92 /**
93 * Get an image size array like that returned by getimagesize(), or false if it
94 * can't be determined.
96 * This function is used for determining the width, height and bitdepth directly
97 * from an image. The results are stored in the database in the img_width,
98 * img_height, img_bits fields.
100 * @note If this is a multipage file, return the width and height of the
101 * first page.
103 * @param File|FSFile $image The image object, or false if there isn't one.
104 * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
105 * @param string $path The filename
106 * @return array|bool Follow the format of PHP getimagesize() internal function.
107 * See https://secure.php.net/getimagesize. MediaWiki will only ever use the
108 * first two array keys (the width and height), and the 'bits' associative
109 * key. All other array keys are ignored. Returning a 'bits' key is optional
110 * as not all formats have a notion of "bitdepth". Returns false on failure.
112 abstract function getImageSize( $image, $path );
115 * Get handler-specific metadata which will be saved in the img_metadata field.
117 * @param File|FSFile $image The image object, or false if there isn't one.
118 * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
119 * @param string $path The filename
120 * @return string A string of metadata in php serialized form (Run through serialize())
122 function getMetadata( $image, $path ) {
123 return '';
127 * Get metadata version.
129 * This is not used for validating metadata, this is used for the api when returning
130 * metadata, since api content formats should stay the same over time, and so things
131 * using ForeignApiRepo can keep backwards compatibility
133 * All core media handlers share a common version number, and extensions can
134 * use the GetMetadataVersion hook to append to the array (they should append a unique
135 * string so not to get confusing). If there was a media handler named 'foo' with metadata
136 * version 3 it might add to the end of the array the element 'foo=3'. if the core metadata
137 * version is 2, the end version string would look like '2;foo=3'.
139 * @return string Version string
141 static function getMetadataVersion() {
142 $version = [ '2' ]; // core metadata version
143 Hooks::run( 'GetMetadataVersion', [ &$version ] );
145 return implode( ';', $version );
149 * Convert metadata version.
151 * By default just returns $metadata, but can be used to allow
152 * media handlers to convert between metadata versions.
154 * @param string|array $metadata Metadata array (serialized if string)
155 * @param int $version Target version
156 * @return array Serialized metadata in specified version, or $metadata on fail.
158 function convertMetadataVersion( $metadata, $version = 1 ) {
159 if ( !is_array( $metadata ) ) {
161 // unserialize to keep return parameter consistent.
162 MediaWiki\suppressWarnings();
163 $ret = unserialize( $metadata );
164 MediaWiki\restoreWarnings();
166 return $ret;
169 return $metadata;
173 * Get a string describing the type of metadata, for display purposes.
175 * @note This method is currently unused.
176 * @param File $image
177 * @return string
179 function getMetadataType( $image ) {
180 return false;
184 * Check if the metadata string is valid for this handler.
185 * If it returns MediaHandler::METADATA_BAD (or false), Image
186 * will reload the metadata from the file and update the database.
187 * MediaHandler::METADATA_GOOD for if the metadata is a-ok,
188 * MediaHandler::METADATA_COMPATIBLE if metadata is old but backwards
189 * compatible (which may or may not trigger a metadata reload).
191 * @note Returning self::METADATA_BAD will trigger a metadata reload from
192 * file on page view. Always returning this from a broken file, or suddenly
193 * triggering as bad metadata for a large number of files can cause
194 * performance problems.
195 * @param File $image
196 * @param string $metadata The metadata in serialized form
197 * @return bool
199 function isMetadataValid( $image, $metadata ) {
200 return self::METADATA_GOOD;
204 * Get an array of standard (FormatMetadata type) metadata values.
206 * The returned data is largely the same as that from getMetadata(),
207 * but formatted in a standard, stable, handler-independent way.
208 * The idea being that some values like ImageDescription or Artist
209 * are universal and should be retrievable in a handler generic way.
211 * The specific properties are the type of properties that can be
212 * handled by the FormatMetadata class. These values are exposed to the
213 * user via the filemetadata parser function.
215 * Details of the response format of this function can be found at
216 * https://www.mediawiki.org/wiki/Manual:File_metadata_handling
217 * tl/dr: the response is an associative array of
218 * properties keyed by name, but the value can be complex. You probably
219 * want to call one of the FormatMetadata::flatten* functions on the
220 * property values before using them, or call
221 * FormatMetadata::getFormattedData() on the full response array, which
222 * transforms all values into prettified, human-readable text.
224 * Subclasses overriding this function must return a value which is a
225 * valid API response fragment (all associative array keys are valid
226 * XML tagnames).
228 * Note, if the file simply has no metadata, but the handler supports
229 * this interface, it should return an empty array, not false.
231 * @param File $file
232 * @return array|bool False if interface not supported
233 * @since 1.23
235 public function getCommonMetaArray( File $file ) {
236 return false;
240 * Get a MediaTransformOutput object representing an alternate of the transformed
241 * output which will call an intermediary thumbnail assist script.
243 * Used when the repository has a thumbnailScriptUrl option configured.
245 * Return false to fall back to the regular getTransform().
246 * @param File $image
247 * @param string $script
248 * @param array $params
249 * @return bool|ThumbnailImage
251 function getScriptedTransform( $image, $script, $params ) {
252 return false;
256 * Get a MediaTransformOutput object representing the transformed output. Does not
257 * actually do the transform.
259 * @param File $image The image object
260 * @param string $dstPath Filesystem destination path
261 * @param string $dstUrl Destination URL to use in output HTML
262 * @param array $params Arbitrary set of parameters validated by $this->validateParam()
263 * @return MediaTransformOutput
265 final function getTransform( $image, $dstPath, $dstUrl, $params ) {
266 return $this->doTransform( $image, $dstPath, $dstUrl, $params, self::TRANSFORM_LATER );
270 * Get a MediaTransformOutput object representing the transformed output. Does the
271 * transform unless $flags contains self::TRANSFORM_LATER.
273 * @param File $image The image object
274 * @param string $dstPath Filesystem destination path
275 * @param string $dstUrl Destination URL to use in output HTML
276 * @param array $params Arbitrary set of parameters validated by $this->validateParam()
277 * Note: These parameters have *not* gone through $this->normaliseParams()
278 * @param int $flags A bitfield, may contain self::TRANSFORM_LATER
279 * @return MediaTransformOutput
281 abstract function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 );
284 * Get the thumbnail extension and MIME type for a given source MIME type
286 * @param string $ext Extension of original file
287 * @param string $mime MIME type of original file
288 * @param array $params Handler specific rendering parameters
289 * @return array Thumbnail extension and MIME type
291 function getThumbType( $ext, $mime, $params = null ) {
292 $magic = MimeMagic::singleton();
293 if ( !$ext || $magic->isMatchingExtension( $ext, $mime ) === false ) {
294 // The extension is not valid for this MIME type and we do
295 // recognize the MIME type
296 $extensions = $magic->getExtensionsForType( $mime );
297 if ( $extensions ) {
298 return [ strtok( $extensions, ' ' ), $mime ];
302 // The extension is correct (true) or the MIME type is unknown to
303 // MediaWiki (null)
304 return [ $ext, $mime ];
308 * Get useful response headers for GET/HEAD requests for a file with the given metadata
310 * @param mixed $metadata Result of the getMetadata() function of this handler for a file
311 * @return array
313 public function getStreamHeaders( $metadata ) {
314 return [];
318 * True if the handled types can be transformed
320 * @param File $file
321 * @return bool
323 public function canRender( $file ) {
324 return true;
328 * True if handled types cannot be displayed directly in a browser
329 * but can be rendered
331 * @param File $file
332 * @return bool
334 public function mustRender( $file ) {
335 return false;
339 * True if the type has multi-page capabilities
341 * @param File $file
342 * @return bool
344 public function isMultiPage( $file ) {
345 return false;
349 * Page count for a multi-page document, false if unsupported or unknown
351 * @param File $file
352 * @return bool
354 function pageCount( File $file ) {
355 return false;
359 * The material is vectorized and thus scaling is lossless
361 * @param File $file
362 * @return bool
364 function isVectorized( $file ) {
365 return false;
369 * The material is an image, and is animated.
370 * In particular, video material need not return true.
371 * @note Before 1.20, this was a method of ImageHandler only
373 * @param File $file
374 * @return bool
376 function isAnimatedImage( $file ) {
377 return false;
381 * If the material is animated, we can animate the thumbnail
382 * @since 1.20
384 * @param File $file
385 * @return bool If material is not animated, handler may return any value.
387 function canAnimateThumbnail( $file ) {
388 return true;
392 * False if the handler is disabled for all files
393 * @return bool
395 function isEnabled() {
396 return true;
400 * Get an associative array of page dimensions
401 * Currently "width" and "height" are understood, but this might be
402 * expanded in the future.
403 * Returns false if unknown.
405 * It is expected that handlers for paged media (e.g. DjVuHandler)
406 * will override this method so that it gives the correct results
407 * for each specific page of the file, using the $page argument.
409 * @note For non-paged media, use getImageSize.
411 * @param File $image
412 * @param int $page What page to get dimensions of
413 * @return array|bool
415 function getPageDimensions( File $image, $page ) {
416 $gis = $this->getImageSize( $image, $image->getLocalRefPath() );
417 if ( $gis ) {
418 return [
419 'width' => $gis[0],
420 'height' => $gis[1]
422 } else {
423 return false;
428 * Generic getter for text layer.
429 * Currently overloaded by PDF and DjVu handlers
430 * @param File $image
431 * @param int $page Page number to get information for
432 * @return bool|string Page text or false when no text found or if
433 * unsupported.
435 function getPageText( File $image, $page ) {
436 return false;
440 * Get the text of the entire document.
441 * @param File $file
442 * @return bool|string The text of the document or false if unsupported.
444 public function getEntireText( File $file ) {
445 $numPages = $file->pageCount();
446 if ( !$numPages ) {
447 // Not a multipage document
448 return $this->getPageText( $file, 1 );
450 $document = '';
451 for ( $i = 1; $i <= $numPages; $i++ ) {
452 $curPage = $this->getPageText( $file, $i );
453 if ( is_string( $curPage ) ) {
454 $document .= $curPage . "\n";
457 if ( $document !== '' ) {
458 return $document;
460 return false;
464 * Get an array structure that looks like this:
467 * 'visible' => [
468 * 'Human-readable name' => 'Human readable value',
469 * ...
470 * ],
471 * 'collapsed' => [
472 * 'Human-readable name' => 'Human readable value',
473 * ...
476 * The UI will format this into a table where the visible fields are always
477 * visible, and the collapsed fields are optionally visible.
479 * The function should return false if there is no metadata to display.
483 * @todo FIXME: This interface is not very flexible. The media handler
484 * should generate HTML instead. It can do all the formatting according
485 * to some standard. That makes it possible to do things like visual
486 * indication of grouped and chained streams in ogg container files.
487 * @param File $image
488 * @param bool|IContextSource $context Context to use (optional)
489 * @return array|bool
491 function formatMetadata( $image, $context = false ) {
492 return false;
495 /** sorts the visible/invisible field.
496 * Split off from ImageHandler::formatMetadata, as used by more than
497 * one type of handler.
499 * This is used by the media handlers that use the FormatMetadata class
501 * @param array $metadataArray Metadata array
502 * @param bool|IContextSource $context Context to use (optional)
503 * @return array Array for use displaying metadata.
505 function formatMetadataHelper( $metadataArray, $context = false ) {
506 $result = [
507 'visible' => [],
508 'collapsed' => []
511 $formatted = FormatMetadata::getFormattedData( $metadataArray, $context );
512 // Sort fields into visible and collapsed
513 $visibleFields = $this->visibleMetadataFields();
514 foreach ( $formatted as $name => $value ) {
515 $tag = strtolower( $name );
516 self::addMeta( $result,
517 in_array( $tag, $visibleFields ) ? 'visible' : 'collapsed',
518 'exif',
519 $tag,
520 $value
524 return $result;
528 * Get a list of metadata items which should be displayed when
529 * the metadata table is collapsed.
531 * @return array Array of strings
533 protected function visibleMetadataFields() {
534 return FormatMetadata::getVisibleFields();
538 * This is used to generate an array element for each metadata value
539 * That array is then used to generate the table of metadata values
540 * on the image page
542 * @param array &$array An array containing elements for each type of visibility
543 * and each of those elements being an array of metadata items. This function adds
544 * a value to that array.
545 * @param string $visibility ('visible' or 'collapsed') if this value is hidden
546 * by default.
547 * @param string $type Type of metadata tag (currently always 'exif')
548 * @param string $id The name of the metadata tag (like 'artist' for example).
549 * its name in the table displayed is the message "$type-$id" (Ex exif-artist ).
550 * @param string $value Thingy goes into a wikitext table; it used to be escaped but
551 * that was incompatible with previous practise of customized display
552 * with wikitext formatting via messages such as 'exif-model-value'.
553 * So the escaping is taken back out, but generally this seems a confusing
554 * interface.
555 * @param bool|string $param Value to pass to the message for the name of the field
556 * as $1. Currently this parameter doesn't seem to ever be used.
558 * Note, everything here is passed through the parser later on (!)
560 protected static function addMeta( &$array, $visibility, $type, $id, $value, $param = false ) {
561 $msg = wfMessage( "$type-$id", $param );
562 if ( $msg->exists() ) {
563 $name = $msg->text();
564 } else {
565 // This is for future compatibility when using instant commons.
566 // So as to not display as ugly a name if a new metadata
567 // property is defined that we don't know about
568 // (not a major issue since such a property would be collapsed
569 // by default).
570 wfDebug( __METHOD__ . ' Unknown metadata name: ' . $id . "\n" );
571 $name = wfEscapeWikiText( $id );
573 $array[$visibility][] = [
574 'id' => "$type-$id",
575 'name' => $name,
576 'value' => $value
581 * Short description. Shown on Special:Search results.
583 * @param File $file
584 * @return string
586 function getShortDesc( $file ) {
587 return self::getGeneralShortDesc( $file );
591 * Long description. Shown under image on image description page surounded by ().
593 * @param File $file
594 * @return string
596 function getLongDesc( $file ) {
597 return self::getGeneralLongDesc( $file );
601 * Used instead of getShortDesc if there is no handler registered for file.
603 * @param File $file
604 * @return string
606 static function getGeneralShortDesc( $file ) {
607 global $wgLang;
609 return htmlspecialchars( $wgLang->formatSize( $file->getSize() ) );
613 * Used instead of getLongDesc if there is no handler registered for file.
615 * @param File $file
616 * @return string
618 static function getGeneralLongDesc( $file ) {
619 return wfMessage( 'file-info' )->sizeParams( $file->getSize() )
620 ->params( '<span class="mime-type">' . $file->getMimeType() . '</span>' )->parse();
624 * Calculate the largest thumbnail width for a given original file size
625 * such that the thumbnail's height is at most $maxHeight.
626 * @param int $boxWidth Width of the thumbnail box.
627 * @param int $boxHeight Height of the thumbnail box.
628 * @param int $maxHeight Maximum height expected for the thumbnail.
629 * @return int
631 public static function fitBoxWidth( $boxWidth, $boxHeight, $maxHeight ) {
632 $idealWidth = $boxWidth * $maxHeight / $boxHeight;
633 $roundedUp = ceil( $idealWidth );
634 if ( round( $roundedUp * $boxHeight / $boxWidth ) > $maxHeight ) {
635 return floor( $idealWidth );
636 } else {
637 return $roundedUp;
642 * Shown in file history box on image description page.
644 * @param File $file
645 * @return string Dimensions
647 function getDimensionsString( $file ) {
648 return '';
652 * Modify the parser object post-transform.
654 * This is often used to do $parser->addOutputHook(),
655 * in order to add some javascript to render a viewer.
656 * See TimedMediaHandler or OggHandler for an example.
658 * @param Parser $parser
659 * @param File $file
661 function parserTransformHook( $parser, $file ) {
665 * File validation hook called on upload.
667 * If the file at the given local path is not valid, or its MIME type does not
668 * match the handler class, a Status object should be returned containing
669 * relevant errors.
671 * @param string $fileName The local path to the file.
672 * @return Status
674 function verifyUpload( $fileName ) {
675 return Status::newGood();
679 * Check for zero-sized thumbnails. These can be generated when
680 * no disk space is available or some other error occurs
682 * @param string $dstPath The location of the suspect file
683 * @param int $retval Return value of some shell process, file will be deleted if this is non-zero
684 * @return bool True if removed, false otherwise
686 function removeBadFile( $dstPath, $retval = 0 ) {
687 if ( file_exists( $dstPath ) ) {
688 $thumbstat = stat( $dstPath );
689 if ( $thumbstat['size'] == 0 || $retval != 0 ) {
690 $result = unlink( $dstPath );
692 if ( $result ) {
693 wfDebugLog( 'thumbnail',
694 sprintf( 'Removing bad %d-byte thumbnail "%s". unlink() succeeded',
695 $thumbstat['size'], $dstPath ) );
696 } else {
697 wfDebugLog( 'thumbnail',
698 sprintf( 'Removing bad %d-byte thumbnail "%s". unlink() failed',
699 $thumbstat['size'], $dstPath ) );
702 return true;
706 return false;
710 * Remove files from the purge list.
712 * This is used by some video handlers to prevent ?action=purge
713 * from removing a transcoded video, which is expensive to
714 * regenerate.
716 * @see LocalFile::purgeThumbnails
718 * @param array $files
719 * @param array $options Purge options. Currently will always be
720 * an array with a single key 'forThumbRefresh' set to true.
722 public function filterThumbnailPurgeList( &$files, $options ) {
723 // Do nothing
727 * True if the handler can rotate the media
728 * @since 1.24 non-static. From 1.21-1.23 was static
729 * @return bool
731 public function canRotate() {
732 return false;
736 * On supporting image formats, try to read out the low-level orientation
737 * of the file and return the angle that the file needs to be rotated to
738 * be viewed.
740 * This information is only useful when manipulating the original file;
741 * the width and height we normally work with is logical, and will match
742 * any produced output views.
744 * For files we don't know, we return 0.
746 * @param File $file
747 * @return int 0, 90, 180 or 270
749 public function getRotation( $file ) {
750 return 0;
754 * Log an error that occurred in an external process
756 * Moved from BitmapHandler to MediaHandler with MediaWiki 1.23
758 * @since 1.23
759 * @param int $retval
760 * @param string $err Error reported by command. Anything longer than
761 * MediaHandler::MAX_ERR_LOG_SIZE is stripped off.
762 * @param string $cmd
764 protected function logErrorForExternalProcess( $retval, $err, $cmd ) {
765 # Keep error output limited (bug 57985)
766 $errMessage = trim( substr( $err, 0, self::MAX_ERR_LOG_SIZE ) );
768 wfDebugLog( 'thumbnail',
769 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
770 wfHostname(), $retval, $errMessage, $cmd ) );
774 * Get list of languages file can be viewed in.
776 * @param File $file
777 * @return string[] Array of language codes, or empty array if unsupported.
778 * @since 1.23
780 public function getAvailableLanguages( File $file ) {
781 return [];
785 * On file types that support renderings in multiple languages,
786 * which language is used by default if unspecified.
788 * If getAvailableLanguages returns a non-empty array, this must return
789 * a valid language code. Otherwise can return null if files of this
790 * type do not support alternative language renderings.
792 * @param File $file
793 * @return string|null Language code or null if multi-language not supported for filetype.
794 * @since 1.23
796 public function getDefaultRenderLanguage( File $file ) {
797 return null;
801 * If its an audio file, return the length of the file. Otherwise 0.
803 * File::getLength() existed for a long time, but was calling a method
804 * that only existed in some subclasses of this class (The TMH ones).
806 * @param File $file
807 * @return float Length in seconds
808 * @since 1.23
810 public function getLength( $file ) {
811 return 0.0;
815 * True if creating thumbnails from the file is large or otherwise resource-intensive.
816 * @param File $file
817 * @return bool
819 public function isExpensiveToThumbnail( $file ) {
820 return false;
824 * Returns whether or not this handler supports the chained generation of thumbnails according
825 * to buckets
826 * @return bool
827 * @since 1.24
829 public function supportsBucketing() {
830 return false;
834 * Returns a normalised params array for which parameters have been cleaned up for bucketing
835 * purposes
836 * @param array $params
837 * @return array
839 public function sanitizeParamsForBucketing( $params ) {
840 return $params;
844 * Gets configuration for the file warning message. Return value of
845 * the following structure:
847 * // Required, module with messages loaded for the client
848 * 'module' => 'example.filewarning.messages',
849 * // Required, array of names of messages
850 * 'messages' => [
851 * // Required, main warning message
852 * 'main' => 'example-filewarning-main',
853 * // Optional, header for warning dialog
854 * 'header' => 'example-filewarning-header',
855 * // Optional, footer for warning dialog
856 * 'footer' => 'example-filewarning-footer',
857 * // Optional, text for more-information link (see below)
858 * 'info' => 'example-filewarning-info',
859 * ],
860 * // Optional, link for more information
861 * 'link' => 'http://example.com',
864 * Returns null if no warning is necessary.
865 * @param File $file
866 * @return array|null
868 public function getWarningConfig( $file ) {
869 return null;