Merge ".mailmap: Correct two contributor names"
[mediawiki.git] / includes / media / WebPHandler.php
blob1e6e0cbd22a7bd13c8c0004a9770388e20ba1010
1 <?php
2 /**
3 * Handler for Google's WebP format <https://developers.google.com/speed/webp/>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Media
24 use MediaWiki\Logger\LoggerFactory;
25 use MediaWiki\MediaWikiServices;
26 use Wikimedia\XMPReader\Reader as XMPReader;
28 /**
29 * Handler for Google's WebP format <https://developers.google.com/speed/webp/>
31 * @ingroup Media
33 class WebPHandler extends BitmapHandler {
34 /**
35 * Value to store in img_metadata if there was an error extracting metadata
37 private const BROKEN_FILE = '0';
38 /**
39 * Minimum chunk header size to be able to read all header types
41 private const MINIMUM_CHUNK_HEADER_LENGTH = 18;
42 /**
43 * Max size of metadata chunk to extract
45 private const MAX_METADATA_CHUNK_SIZE = 1024 * 1024 * 2;
46 /**
47 * Version of the metadata stored in db records
49 private const _MW_WEBP_VERSION = 2;
51 private const VP8X_ICC = 32;
52 private const VP8X_ALPHA = 16;
53 private const VP8X_EXIF = 8;
54 private const VP8X_XMP = 4;
55 private const VP8X_ANIM = 2;
57 public function getSizeAndMetadata( $state, $filename ) {
58 $parsedWebPData = self::extractMetadata( $filename );
59 if ( !$parsedWebPData ) {
60 return [ 'metadata' => [ '_error' => self::BROKEN_FILE ] ];
63 $parsedWebPData['metadata']['_MW_WEBP_VERSION'] = self::_MW_WEBP_VERSION;
64 $info = [
65 'width' => $parsedWebPData['width'],
66 'height' => $parsedWebPData['height'],
67 'metadata' => $parsedWebPData
69 return $info;
72 public function getMetadataType( $image ) {
73 return 'parsed-webp';
76 public function isFileMetadataValid( $image ) {
77 $data = $image->getMetadataArray();
78 if ( $data === [ '_error' => self::BROKEN_FILE ] ) {
79 // Do not repetitivly regenerate metadata on broken file.
80 return self::METADATA_GOOD;
83 if ( !$data || !isset( $data['_error'] ) ) {
84 wfDebug( __METHOD__ . " invalid WebP metadata" );
86 return self::METADATA_BAD;
89 if ( !isset( $data['metadata']['_MW_WEBP_VERSION'] )
90 || $data['metadata']['_MW_WEBP_VERSION'] != self::_MW_WEBP_VERSION
91 ) {
92 wfDebug( __METHOD__ . " old but compatible WebP metadata" );
94 return self::METADATA_COMPATIBLE;
96 return self::METADATA_GOOD;
99 /**
100 * Extracts the image size and WebP type from a file
102 * @param string $filename
103 * @return array|false Header data array with entries 'compression', 'width' and 'height',
104 * where 'compression' can be 'lossy', 'lossless', 'animated' or 'unknown'. False if
105 * file is not a valid WebP file.
107 public static function extractMetadata( $filename ) {
108 wfDebugLog( 'WebP', __METHOD__ . ": Extracting metadata from $filename" );
110 $info = RiffExtractor::findChunksFromFile( $filename, 100 );
111 if ( $info === false ) {
112 wfDebugLog( 'WebP', __METHOD__ . ": Not a valid RIFF file" );
113 return false;
116 if ( $info['fourCC'] !== 'WEBP' ) {
117 wfDebugLog( 'WebP', __METHOD__ . ': FourCC was not WEBP: ' .
118 bin2hex( $info['fourCC'] ) );
119 return false;
121 $metadata = self::extractMetadataFromChunks( $info['chunks'], $filename );
122 if ( !$metadata ) {
123 wfDebugLog( 'WebP', __METHOD__ . ": No VP8 chunks found" );
124 return false;
127 return $metadata;
131 * Extracts the image size and WebP type from a file based on the chunk list
132 * @param array[] $chunks Chunks as extracted by RiffExtractor
133 * @param string $filename
134 * @return array Header data array with entries 'compression', 'width' and 'height', where
135 * 'compression' can be 'lossy', 'lossless', 'animated' or 'unknown'
137 public static function extractMetadataFromChunks( $chunks, $filename ) {
138 $vp8Info = [];
139 $exifData = null;
140 $xmpData = null;
142 foreach ( $chunks as $chunk ) {
143 // Note, spec says it should be 'XMP ' but some real life files use "XMP\0"
144 if ( !in_array( $chunk['fourCC'], [ 'VP8 ', 'VP8L', 'VP8X', 'EXIF', 'XMP ', "XMP\0" ] ) ) {
145 // Not a chunk containing interesting metadata
146 continue;
149 $chunkHeader = file_get_contents( $filename, false, null,
150 $chunk['start'], self::MINIMUM_CHUNK_HEADER_LENGTH );
151 wfDebugLog( 'WebP', __METHOD__ . ": {$chunk['fourCC']}" );
153 switch ( $chunk['fourCC'] ) {
154 case 'VP8 ':
155 $vp8Info = array_merge( $vp8Info,
156 self::decodeLossyChunkHeader( $chunkHeader ) );
157 break;
158 case 'VP8L':
159 $vp8Info = array_merge( $vp8Info,
160 self::decodeLosslessChunkHeader( $chunkHeader ) );
161 break;
162 case 'VP8X':
163 $vp8Info = array_merge( $vp8Info,
164 self::decodeExtendedChunkHeader( $chunkHeader ) );
165 // Continue looking for other chunks to improve the metadata
166 break;
167 case 'EXIF':
168 // Spec says ignore all but first one
169 $exifData ??= self::extractChunk( $chunk, $filename );
170 break;
171 case 'XMP ':
172 case "XMP\0":
173 $xmpData ??= self::extractChunk( $chunk, $filename );
174 break;
177 $vp8Info = array_merge( $vp8Info,
178 self::decodeMediaMetadata( $exifData, $xmpData, $filename ) );
179 return $vp8Info;
183 * Decode metadata about the file (XMP & Exif).
185 * @param string|null $exifData Binary exif data from file
186 * @param string|null $xmpData XMP data from file
187 * @param string|null $filename (Used for logging only)
188 * @return array
190 private static function decodeMediaMetadata( $exifData, $xmpData, $filename ) {
191 if ( $exifData === null && $xmpData === null ) {
192 // Nothing to do
193 return [];
195 $bitmapMetadataHandler = new BitmapMetadataHandler;
197 if ( $xmpData && XMPReader::isSupported() ) {
198 $xmpReader = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
199 $xmpReader->parse( $xmpData );
200 $res = $xmpReader->getResults();
201 foreach ( $res as $type => $array ) {
202 $bitmapMetadataHandler->addMetadata( $array, $type );
206 if ( $exifData ) {
207 // The Exif section of a webp file is basically a tiff file without an image.
208 // Some files start with an Exif\0\0. This is wrong according to standard and
209 // will prevent us from reading file, so remove for compatibility.
210 if ( substr( $exifData, 0, 6 ) === "Exif\x00\x00" ) {
211 $exifData = substr( $exifData, 6 );
213 $tmpFile = MediaWikiServices::getInstance()->
214 getTempFSFileFactory()->
215 newTempFSFile( 'webp-exif_', 'tiff' );
217 $exifDataFile = $tmpFile->getPath();
218 file_put_contents( $exifDataFile, $exifData );
219 $byteOrder = BitmapMetadataHandler::getTiffByteOrder( $exifDataFile );
220 $bitmapMetadataHandler->getExif( $exifDataFile, $byteOrder );
222 return [ 'media-metadata' => $bitmapMetadataHandler->getMetadataArray() ];
226 * @param array $chunk Information about chunk
227 * @param string $filename
228 * @return null|string Contents of chunk (excluding fourcc, size and padding)
230 private static function extractChunk( $chunk, $filename ) {
231 if ( $chunk['size'] > self::MAX_METADATA_CHUNK_SIZE || $chunk['size'] < 1 ) {
232 return null;
235 // Skip first 8 bytes as that is the fourCC header followed by size of chunk.
236 return file_get_contents( $filename, false, null, $chunk['start'] + 8, $chunk['size'] );
240 * Decodes a lossy chunk header
241 * @param string $header First few bytes of the header, expected to be at least 18 bytes long
242 * @return bool|array See WebPHandler::decodeHeader
244 protected static function decodeLossyChunkHeader( $header ) {
245 // Bytes 0-3 are 'VP8 '
246 // Bytes 4-7 are the VP8 stream size
247 // Bytes 8-10 are the frame tag
248 // Bytes 11-13 are 0x9D 0x01 0x2A called the sync code
249 $syncCode = substr( $header, 11, 3 );
250 if ( $syncCode !== "\x9D\x01\x2A" ) {
251 wfDebugLog( 'WebP', __METHOD__ . ': Invalid sync code: ' .
252 bin2hex( $syncCode ) );
253 return [];
255 // Bytes 14-17 are image size
256 $imageSize = unpack( 'v2', substr( $header, 14, 4 ) );
257 // Image sizes are 14 bit, 2 MSB are scaling parameters which are ignored here
258 return [
259 'compression' => 'lossy',
260 'width' => $imageSize[1] & 0x3FFF,
261 'height' => $imageSize[2] & 0x3FFF
266 * Decodes a lossless chunk header
267 * @param string $header First few bytes of the header, expected to be at least 13 bytes long
268 * @return bool|array See WebPHandler::decodeHeader
270 public static function decodeLosslessChunkHeader( $header ) {
271 // Bytes 0-3 are 'VP8L'
272 // Bytes 4-7 are chunk stream size
273 // Byte 8 is 0x2F called the signature
274 if ( $header[8] !== "\x2F" ) {
275 wfDebugLog( 'WebP', __METHOD__ . ': Invalid signature: ' .
276 bin2hex( $header[8] ) );
277 return [];
279 // Bytes 9-12 contain the image size
280 // Bits 0-13 are width-1; bits 15-27 are height-1
281 $imageSize = unpack( 'C4', substr( $header, 9, 4 ) );
282 return [
283 'compression' => 'lossless',
284 'width' => ( $imageSize[1] | ( ( $imageSize[2] & 0x3F ) << 8 ) ) + 1,
285 'height' => ( ( ( $imageSize[2] & 0xC0 ) >> 6 ) |
286 ( $imageSize[3] << 2 ) | ( ( $imageSize[4] & 0x03 ) << 10 ) ) + 1
291 * Decodes an extended chunk header
292 * @param string $header First few bytes of the header, expected to be at least 18 bytes long
293 * @return bool|array See WebPHandler::decodeHeader
295 public static function decodeExtendedChunkHeader( $header ) {
296 // Bytes 0-3 are 'VP8X'
297 // Byte 4-7 are chunk length
298 // Byte 8-11 are a flag bytes
299 $flags = unpack( 'c', substr( $header, 8, 1 ) );
301 // Byte 12-17 are image size (24 bits)
302 $width = unpack( 'V', substr( $header, 12, 3 ) . "\x00" );
303 $height = unpack( 'V', substr( $header, 15, 3 ) . "\x00" );
305 return [
306 'compression' => 'unknown',
307 'animated' => ( $flags[1] & self::VP8X_ANIM ) === self::VP8X_ANIM,
308 'transparency' => ( $flags[1] & self::VP8X_ALPHA ) === self::VP8X_ALPHA,
309 'width' => ( $width[1] & 0xFFFFFF ) + 1,
310 'height' => ( $height[1] & 0xFFFFFF ) + 1
315 * @param File $file
316 * @return bool True, not all browsers support WebP
318 public function mustRender( $file ) {
319 return true;
323 * @param File $file
324 * @return bool False if we are unable to render this image
326 public function canRender( $file ) {
327 if ( $this->isAnimatedImage( $file ) ) {
328 return false;
330 return true;
334 * @param File $image
335 * @return bool
337 public function isAnimatedImage( $image ) {
338 $metadata = $image->getMetadataArray();
339 if ( isset( $metadata['animated'] ) && $metadata['animated'] === true ) {
340 return true;
343 return false;
346 public function canAnimateThumbnail( $file ) {
347 return false;
351 * Render files as PNG
353 * @param string $ext
354 * @param string $mime
355 * @param array|null $params
356 * @return array
358 public function getThumbType( $ext, $mime, $params = null ) {
359 return [ 'png', 'image/png' ];
362 protected function hasGDSupport() {
363 return function_exists( 'gd_info' ) && ( gd_info()['WebP Support'] ?? false );
366 public function getCommonMetaArray( File $image ) {
367 $meta = $image->getMetadataArray();
368 return $meta['media-metadata'] ?? [];
371 public function formatMetadata( $image, $context = false ) {
372 $meta = $this->getCommonMetaArray( $image );
373 if ( !$meta ) {
374 return false;
377 return $this->formatMetadataHelper( $meta, $context );