3 * PNG frame counter and metadata extractor.
4 * Slightly derived from GIFMetadataExtractor.php
5 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
17 class PNGMetadataExtractor
{
23 const MAX_CHUNK_SIZE
= 3145728; // 3 megabytes
25 static function getMetadata( $filename ) {
26 self
::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
28 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
29 * and http://www.w3.org/TR/PNG/#11keywords
31 self
::$text_chunks = array(
32 'xml:com.adobe.xmp' => 'xmp',
33 # Artist is unofficial. Author is the recommended
34 # keyword in the PNG spec. However some people output
35 # Artist so support both.
40 'comment' => 'PNGFileComment',
41 'description' => 'ImageDescription',
42 'title' => 'ObjectName',
43 'copyright' => 'Copyright',
44 # Source as in original device used to make image
45 # not as in who gave you the image
47 'software' => 'Software',
48 'disclaimer' => 'Disclaimer',
49 'warning' => 'ContentWarning',
50 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
52 'creation time' => 'DateTimeDigitized',
53 /* Other potentially useful things - Document */
61 $colorType = 'unknown';
64 throw new Exception( __METHOD__
. ": No file name specified" );
65 } elseif ( !file_exists( $filename ) ||
is_dir( $filename ) ) {
66 throw new Exception( __METHOD__
. ": File $filename does not exist" );
69 $fh = fopen( $filename, 'rb' );
72 throw new Exception( __METHOD__
. ": Unable to open file $filename" );
75 // Check for the PNG header
76 $buf = fread( $fh, 8 );
77 if ( $buf != self
::$png_sig ) {
78 throw new Exception( __METHOD__
. ": Not a valid PNG file; header: $buf" );
82 while ( !feof( $fh ) ) {
83 $buf = fread( $fh, 4 );
84 if ( !$buf ||
strlen( $buf ) < 4 ) {
85 throw new Exception( __METHOD__
. ": Read error" );
87 $chunk_size = unpack( "N", $buf );
88 $chunk_size = $chunk_size[1];
90 if ( $chunk_size < 0 ) {
91 throw new Exception( __METHOD__
. ": Chunk size too big for unpack" );
94 $chunk_type = fread( $fh, 4 );
95 if ( !$chunk_type ||
strlen( $chunk_type ) < 4 ) {
96 throw new Exception( __METHOD__
. ": Read error" );
99 if ( $chunk_type == "IHDR" ) {
100 $buf = self
::read( $fh, $chunk_size );
101 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
102 throw new Exception( __METHOD__
. ": Read error" );
104 $bitDepth = ord( substr( $buf, 8, 1 ) );
105 // Detect the color type in British English as per the spec
106 // http://www.w3.org/TR/PNG/#11IHDR
107 switch ( ord( substr( $buf, 9, 1 ) ) ) {
109 $colorType = 'greyscale';
112 $colorType = 'truecolour';
115 $colorType = 'index-coloured';
118 $colorType = 'greyscale-alpha';
121 $colorType = 'truecolour-alpha';
124 $colorType = 'unknown';
127 } elseif ( $chunk_type == "acTL" ) {
128 $buf = fread( $fh, $chunk_size );
129 if( !$buf ||
strlen( $buf ) < $chunk_size ||
$chunk_size < 4 ) {
130 throw new Exception( __METHOD__
. ": Read error" );
133 $actl = unpack( "Nframes/Nplays", $buf );
134 $frameCount = $actl['frames'];
135 $loopCount = $actl['plays'];
136 } elseif ( $chunk_type == "fcTL" ) {
137 $buf = self
::read( $fh, $chunk_size );
138 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
139 throw new Exception( __METHOD__
. ": Read error" );
141 $buf = substr( $buf, 20 );
142 if ( strlen( $buf ) < 4 ) {
143 throw new Exception( __METHOD__
. ": Read error" );
146 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
147 if ( $fctldur['delay_den'] == 0 ) {
148 $fctldur['delay_den'] = 100;
150 if ( $fctldur['delay_num'] ) {
151 $duration +
= $fctldur['delay_num'] / $fctldur['delay_den'];
153 } elseif ( $chunk_type == "iTXt" ) {
154 // Extracts iTXt chunks, uncompressing if necessary.
155 $buf = self
::read( $fh, $chunk_size );
158 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
161 /* $items[1] = text chunk name, $items[2] = compressed flag,
162 * $items[3] = lang code (or ""), $items[4]= compression type.
163 * $items[5] = content
166 // Theoretically should be case-sensitive, but in practise...
167 $items[1] = strtolower( $items[1] );
168 if ( !isset( self
::$text_chunks[$items[1]] ) ) {
169 // Only extract textual chunks on our list.
170 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
174 $items[3] = strtolower( $items[3] );
175 if ( $items[3] == '' ) {
176 // if no lang specified use x-default like in xmp.
177 $items[3] = 'x-default';
181 if ( $items[2] == "\x01" ) {
182 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
183 wfSuppressWarnings();
184 $items[5] = gzuncompress( $items[5] );
187 if ( $items[5] === false ) {
188 // decompression failed
189 wfDebug( __METHOD__
. ' Error decompressing iTxt chunk - ' . $items[1] );
190 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
195 wfDebug( __METHOD__
. ' Skipping compressed png iTXt chunk due to lack of zlib,'
196 . ' or potentially invalid compression method' );
197 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
201 $finalKeyword = self
::$text_chunks[ $items[1] ];
202 $text[ $finalKeyword ][ $items[3] ] = $items[5];
203 $text[ $finalKeyword ]['_type'] = 'lang';
206 // Error reading iTXt chunk
207 throw new Exception( __METHOD__
. ": Read error on iTXt chunk" );
210 } elseif ( $chunk_type == 'tEXt' ) {
211 $buf = self
::read( $fh, $chunk_size );
213 // In case there is no \x00 which will make explode fail.
214 if ( strpos( $buf, "\x00" ) === false ) {
215 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
218 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
219 if ( $keyword === '' ||
$content === '' ) {
220 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
223 // Theoretically should be case-sensitive, but in practise...
224 $keyword = strtolower( $keyword );
225 if ( !isset( self
::$text_chunks[ $keyword ] ) ) {
226 // Don't recognize chunk, so skip.
227 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
230 wfSuppressWarnings();
231 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
234 if ( $content === false ) {
235 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
238 $finalKeyword = self
::$text_chunks[ $keyword ];
239 $text[ $finalKeyword ][ 'x-default' ] = $content;
240 $text[ $finalKeyword ]['_type'] = 'lang';
242 } elseif ( $chunk_type == 'zTXt' ) {
243 if ( function_exists( 'gzuncompress' ) ) {
244 $buf = self
::read( $fh, $chunk_size );
246 // In case there is no \x00 which will make explode fail.
247 if ( strpos( $buf, "\x00" ) === false ) {
248 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
251 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
252 if ( $keyword === '' ||
$postKeyword === '' ) {
253 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
255 // Theoretically should be case-sensitive, but in practise...
256 $keyword = strtolower( $keyword );
258 if ( !isset( self
::$text_chunks[ $keyword ] ) ) {
259 // Don't recognize chunk, so skip.
260 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
263 $compression = substr( $postKeyword, 0, 1 );
264 $content = substr( $postKeyword, 1 );
265 if ( $compression !== "\x00" ) {
266 wfDebug( __METHOD__
. " Unrecognized compression method in zTXt ($keyword). Skipping." );
267 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
271 wfSuppressWarnings();
272 $content = gzuncompress( $content );
275 if ( $content === false ) {
276 // decompression failed
277 wfDebug( __METHOD__
. ' Error decompressing zTXt chunk - ' . $keyword );
278 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
282 wfSuppressWarnings();
283 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
286 if ( $content === false ) {
287 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
290 $finalKeyword = self
::$text_chunks[ $keyword ];
291 $text[ $finalKeyword ][ 'x-default' ] = $content;
292 $text[ $finalKeyword ]['_type'] = 'lang';
295 wfDebug( __METHOD__
. " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
296 fseek( $fh, $chunk_size, SEEK_CUR
);
298 } elseif ( $chunk_type == 'tIME' ) {
299 // last mod timestamp.
300 if ( $chunk_size !== 7 ) {
301 throw new Exception( __METHOD__
. ": tIME wrong size" );
303 $buf = self
::read( $fh, $chunk_size );
304 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
305 throw new Exception( __METHOD__
. ": Read error" );
308 // Note: spec says this should be UTC.
309 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
310 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
311 $t['y'], $t['m'], $t['d'], $t['h'],
312 $t['min'], $t['s'] );
314 $exifTime = wfTimestamp( TS_EXIF
, $strTime );
317 $text['DateTime'] = $exifTime;
320 } elseif ( $chunk_type == 'pHYs' ) {
321 // how big pixels are (dots per meter).
322 if ( $chunk_size !== 9 ) {
323 throw new Exception( __METHOD__
. ": pHYs wrong size" );
326 $buf = self
::read( $fh, $chunk_size );
327 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
328 throw new Exception( __METHOD__
. ": Read error" );
331 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
332 if ( $dim['unit'] == 1 ) {
333 // Need to check for negative because php
334 // doesn't deal with super-large unsigned 32-bit ints well
335 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
337 // (as opposed to 0 = undefined )
338 $text['XResolution'] = $dim['width']
340 $text['YResolution'] = $dim['height']
342 $text['ResolutionUnit'] = 3;
343 // 3 = dots per cm (from Exif).
347 } elseif ( $chunk_type == "IEND" ) {
350 fseek( $fh, $chunk_size, SEEK_CUR
);
352 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
356 if ( $loopCount > 1 ) {
357 $duration *= $loopCount;
360 if ( isset( $text['DateTimeDigitized'] ) ) {
361 // Convert date format from rfc2822 to exif.
362 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
363 if ( $name === '_type' ) {
367 // @todo FIXME: Currently timezones are ignored.
368 // possibly should be wfTimestamp's
369 // responsibility. (at least for numeric TZ)
370 $formatted = wfTimestamp( TS_EXIF
, $value );
372 // Only change if we could convert the
374 // The png standard says it should be
375 // in rfc2822 format, but not required.
376 // In general for the exif stuff we
377 // prettify the date if we can, but we
378 // display as-is if we cannot or if
380 // So do the same here.
387 'frameCount' => $frameCount,
388 'loopCount' => $loopCount,
389 'duration' => $duration,
391 'bitDepth' => $bitDepth,
392 'colorType' => $colorType,
397 * Read a chunk, checking to make sure its not too big.
399 * @param $fh resource The file handle
400 * @param $size Integer size in bytes.
401 * @throws Exception if too big.
402 * @return String The chunk.
404 static private function read( $fh, $size ) {
405 if ( $size > self
::MAX_CHUNK_SIZE
) {
406 throw new Exception( __METHOD__
. ': Chunk size of ' . $size .
407 ' too big. Max size is: ' . self
::MAX_CHUNK_SIZE
);
409 return fread( $fh, $size );