Merge "docs: Fix typo"
[mediawiki.git] / includes / media / PNGMetadataExtractor.php
blob2709f1a6c54d149e2e2aa759398ce8d0d3bce65f
1 <?php
2 /**
3 * PNG frame counter and metadata extractor.
5 * Slightly derived from GIFMetadataExtractor.php
6 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
7 * redistribution.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
24 * @file
25 * @ingroup Media
28 use Wikimedia\AtEase\AtEase;
30 /**
31 * PNG frame counter.
33 * @ingroup Media
35 class PNGMetadataExtractor {
36 /** @var string */
37 private static $pngSig;
39 /** @var int */
40 private static $crcSize;
42 /** @var array */
43 private static $textChunks;
45 public const VERSION = 1;
46 private const MAX_CHUNK_SIZE = 3_145_728; // 3 mebibytes
48 public static function getMetadata( $filename ) {
49 self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
50 self::$crcSize = 4;
51 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
52 * and https://www.w3.org/TR/PNG/#11keywords
54 self::$textChunks = [
55 'xml:com.adobe.xmp' => 'xmp',
56 # Artist is unofficial. Author is the recommended
57 # keyword in the PNG spec. However some people output
58 # Artist so support both.
59 'artist' => 'Artist',
60 'model' => 'Model',
61 'make' => 'Make',
62 'author' => 'Artist',
63 'comment' => 'PNGFileComment',
64 'description' => 'ImageDescription',
65 'title' => 'ObjectName',
66 'copyright' => 'Copyright',
67 # Source as in original device used to make image
68 # not as in who gave you the image
69 'source' => 'Model',
70 'software' => 'Software',
71 'disclaimer' => 'Disclaimer',
72 'warning' => 'ContentWarning',
73 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
74 'label' => 'Label',
75 'creation time' => 'DateTimeDigitized',
76 /* Other potentially useful things - Document */
79 $frameCount = 0;
80 $loopCount = 1;
81 $text = [];
82 $duration = 0.0;
83 $width = 0;
84 $height = 0;
85 $bitDepth = 0;
86 $colorType = 'unknown';
88 if ( !$filename ) {
89 throw new InvalidArgumentException( __METHOD__ . ": No file name specified" );
92 if ( !file_exists( $filename ) || is_dir( $filename ) ) {
93 throw new InvalidArgumentException( __METHOD__ . ": File $filename does not exist" );
96 $fh = fopen( $filename, 'rb' );
98 if ( !$fh ) {
99 throw new InvalidArgumentException( __METHOD__ . ": Unable to open file $filename" );
102 // Check for the PNG header
103 $buf = self::read( $fh, 8 );
104 if ( $buf !== self::$pngSig ) {
105 throw new InvalidArgumentException( __METHOD__ . ": Not a valid PNG file; header: $buf" );
108 // Read chunks
109 while ( !feof( $fh ) ) {
110 $buf = self::read( $fh, 4 );
111 $chunk_size = unpack( "N", $buf )[1];
113 if ( $chunk_size < 0 || $chunk_size > self::MAX_CHUNK_SIZE ) {
114 wfDebug( __METHOD__ . ': Chunk size of ' . $chunk_size .
115 ' too big, skipping. Max size is: ' . self::MAX_CHUNK_SIZE );
116 if ( fseek( $fh, 4 + $chunk_size + self::$crcSize, SEEK_CUR ) !== 0 ) {
117 throw new InvalidArgumentException( __METHOD__ . ': seek error' );
119 continue;
122 $chunk_type = self::read( $fh, 4 );
123 $buf = self::read( $fh, $chunk_size );
124 $crc = self::read( $fh, self::$crcSize );
125 $computed = crc32( $chunk_type . $buf );
126 if ( pack( 'N', $computed ) !== $crc ) {
127 wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' );
128 continue;
131 if ( $chunk_type === "IHDR" ) {
132 $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
133 $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
134 $bitDepth = ord( substr( $buf, 8, 1 ) );
135 // Detect the color type in British English as per the spec
136 // https://www.w3.org/TR/PNG/#11IHDR
137 switch ( ord( substr( $buf, 9, 1 ) ) ) {
138 case 0:
139 $colorType = 'greyscale';
140 break;
141 case 2:
142 $colorType = 'truecolour';
143 break;
144 case 3:
145 $colorType = 'index-coloured';
146 break;
147 case 4:
148 $colorType = 'greyscale-alpha';
149 break;
150 case 6:
151 $colorType = 'truecolour-alpha';
152 break;
153 default:
154 $colorType = 'unknown';
155 break;
157 } elseif ( $chunk_type === "acTL" ) {
158 if ( $chunk_size < 4 ) {
159 wfDebug( __METHOD__ . ": acTL chunk too small" );
160 continue;
163 $actl = unpack( "Nframes/Nplays", $buf );
164 $frameCount = $actl['frames'];
165 $loopCount = $actl['plays'];
166 } elseif ( $chunk_type === "fcTL" ) {
167 $buf = substr( $buf, 20 );
168 if ( strlen( $buf ) < 4 ) {
169 wfDebug( __METHOD__ . ": fcTL chunk too small" );
170 continue;
173 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
174 if ( $fctldur['delay_den'] == 0 ) {
175 $fctldur['delay_den'] = 100;
177 if ( $fctldur['delay_num'] ) {
178 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
180 } elseif ( $chunk_type === "iTXt" ) {
181 // Extracts iTXt chunks, uncompressing if necessary.
182 $items = [];
183 if ( preg_match(
184 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
185 $buf, $items )
187 /* $items[1] = text chunk name, $items[2] = compressed flag,
188 * $items[3] = lang code (or ""), $items[4]= compression type.
189 * $items[5] = content
192 // Theoretically should be case-sensitive, but in practise...
193 $items[1] = strtolower( $items[1] );
194 if ( !isset( self::$textChunks[$items[1]] ) ) {
195 // Only extract textual chunks on our list.
196 continue;
199 $items[3] = strtolower( $items[3] );
200 if ( $items[3] == '' ) {
201 // if no lang specified use x-default like in xmp.
202 $items[3] = 'x-default';
205 // if compressed
206 if ( $items[2] === "\x01" ) {
207 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
208 AtEase::suppressWarnings();
209 $items[5] = gzuncompress( $items[5] );
210 AtEase::restoreWarnings();
212 if ( $items[5] === false ) {
213 // decompression failed
214 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
215 continue;
217 } else {
218 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
219 . " or potentially invalid compression method" );
220 continue;
223 $finalKeyword = self::$textChunks[$items[1]];
224 $text[$finalKeyword][$items[3]] = $items[5];
225 $text[$finalKeyword]['_type'] = 'lang';
226 } else {
227 // Error reading iTXt chunk
228 wfDebug( __METHOD__ . ": Invalid iTXt chunk" );
230 } elseif ( $chunk_type === 'tEXt' ) {
231 // In case there is no \x00 which will make explode fail.
232 if ( strpos( $buf, "\x00" ) === false ) {
233 wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" );
234 continue;
237 [ $keyword, $content ] = explode( "\x00", $buf, 2 );
238 if ( $keyword === '' ) {
239 wfDebug( __METHOD__ . ": Empty tEXt keyword" );
240 continue;
243 // Theoretically should be case-sensitive, but in practise...
244 $keyword = strtolower( $keyword );
245 if ( !isset( self::$textChunks[$keyword] ) ) {
246 // Don't recognize chunk, so skip.
247 continue;
249 AtEase::suppressWarnings();
250 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
251 AtEase::restoreWarnings();
253 if ( $content === false ) {
254 wfDebug( __METHOD__ . ": Read error (error with iconv)" );
255 continue;
258 $finalKeyword = self::$textChunks[$keyword];
259 $text[$finalKeyword]['x-default'] = $content;
260 $text[$finalKeyword]['_type'] = 'lang';
261 } elseif ( $chunk_type === 'zTXt' ) {
262 if ( function_exists( 'gzuncompress' ) ) {
263 // In case there is no \x00 which will make explode fail.
264 if ( strpos( $buf, "\x00" ) === false ) {
265 wfDebug( __METHOD__ . ": No null byte in zTXt chunk" );
266 continue;
269 [ $keyword, $postKeyword ] = explode( "\x00", $buf, 2 );
270 if ( $keyword === '' || $postKeyword === '' ) {
271 wfDebug( __METHOD__ . ": Empty zTXt chunk" );
272 continue;
274 // Theoretically should be case-sensitive, but in practise...
275 $keyword = strtolower( $keyword );
277 if ( !isset( self::$textChunks[$keyword] ) ) {
278 // Don't recognize chunk, so skip.
279 continue;
281 $compression = substr( $postKeyword, 0, 1 );
282 $content = substr( $postKeyword, 1 );
283 if ( $compression !== "\x00" ) {
284 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
285 continue;
288 AtEase::suppressWarnings();
289 $content = gzuncompress( $content );
290 AtEase::restoreWarnings();
292 if ( $content === false ) {
293 // decompression failed
294 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
295 continue;
298 AtEase::suppressWarnings();
299 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
300 AtEase::restoreWarnings();
302 if ( $content === false ) {
303 wfDebug( __METHOD__ . ": iconv error in zTXt chunk" );
304 continue;
307 $finalKeyword = self::$textChunks[$keyword];
308 $text[$finalKeyword]['x-default'] = $content;
309 $text[$finalKeyword]['_type'] = 'lang';
310 } else {
311 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
313 } elseif ( $chunk_type === 'tIME' ) {
314 // last mod timestamp.
315 if ( $chunk_size !== 7 ) {
316 wfDebug( __METHOD__ . ": tIME wrong size" );
317 continue;
320 // Note: spec says this should be UTC.
321 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
322 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
323 $t['y'], $t['m'], $t['d'], $t['h'],
324 $t['min'], $t['s'] );
326 $exifTime = wfTimestamp( TS_EXIF, $strTime );
328 if ( $exifTime ) {
329 $text['DateTime'] = $exifTime;
331 } elseif ( $chunk_type === 'pHYs' ) {
332 // how big pixels are (dots per meter).
333 if ( $chunk_size !== 9 ) {
334 wfDebug( __METHOD__ . ": pHYs wrong size" );
335 continue;
338 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
339 if ( $dim['unit'] === 1 ) {
340 // Need to check for negative because php
341 // doesn't deal with super-large unsigned 32-bit ints well
342 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
343 // unit is meters
344 // (as opposed to 0 = undefined )
345 $text['XResolution'] = $dim['width']
346 . '/100';
347 $text['YResolution'] = $dim['height']
348 . '/100';
349 $text['ResolutionUnit'] = 3;
350 // 3 = dots per cm (from Exif).
353 } elseif ( $chunk_type === "IEND" ) {
354 break;
357 fclose( $fh );
359 if ( $loopCount > 1 ) {
360 $duration *= $loopCount;
363 if ( isset( $text['DateTimeDigitized'] ) ) {
364 // Convert date format from rfc2822 to exif.
365 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
366 if ( $name === '_type' ) {
367 continue;
370 // @todo FIXME: Currently timezones are ignored.
371 // possibly should be wfTimestamp's
372 // responsibility. (at least for numeric TZ)
373 $formatted = wfTimestamp( TS_EXIF, $value );
374 if ( $formatted ) {
375 // Only change if we could convert the
376 // date.
377 // The png standard says it should be
378 // in rfc2822 format, but not required.
379 // In general for the exif stuff we
380 // prettify the date if we can, but we
381 // display as-is if we cannot or if
382 // it is invalid.
383 // So do the same here.
385 $value = $formatted;
390 return [
391 'width' => $width,
392 'height' => $height,
393 'frameCount' => $frameCount,
394 'loopCount' => $loopCount,
395 'duration' => $duration,
396 'text' => $text,
397 'bitDepth' => $bitDepth,
398 'colorType' => $colorType,
403 * Read a chunk, checking to make sure its not too big.
405 * @param resource $fh The file handle
406 * @param int $size Size in bytes.
407 * @throws Exception If too big
408 * @return string The chunk.
410 private static function read( $fh, $size ) {
411 if ( $size === 0 ) {
412 return '';
415 $result = fread( $fh, $size );
416 if ( $result === false ) {
417 throw new InvalidArgumentException( __METHOD__ . ': read error' );
419 if ( strlen( $result ) < $size ) {
420 throw new InvalidArgumentException( __METHOD__ . ': unexpected end of file' );
422 return $result;