3 * PNG frame counter and metadata extractor.
4 * Slightly derived from GIFMetadataExtractor.php
5 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
17 class PNGMetadataExtractor
{
23 const MAX_CHUNK_SIZE
= 3145728; // 3 megabytes
25 static function getMetadata( $filename ) {
26 self
::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
28 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
29 * and http://www.w3.org/TR/PNG/#11keywords
31 self
::$text_chunks = array(
32 'xml:com.adobe.xmp' => 'xmp',
33 # Artist is unofficial. Author is the recommended
34 # keyword in the PNG spec. However some people output
35 # Artist so support both.
40 'comment' => 'PNGFileComment',
41 'description' => 'ImageDescription',
42 'title' => 'ObjectName',
43 'copyright' => 'Copyright',
44 # Source as in original device used to make image
45 # not as in who gave you the image
47 'software' => 'Software',
48 'disclaimer' => 'Disclaimer',
49 'warning' => 'ContentWarning',
50 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
52 'creation time' => 'DateTimeDigitized',
53 /* Other potentially useful things - Document */
61 $colorType = 'unknown';
64 throw new Exception( __METHOD__
. ": No file name specified" );
65 } elseif ( !file_exists( $filename ) ||
is_dir( $filename ) ) {
66 throw new Exception( __METHOD__
. ": File $filename does not exist" );
69 $fh = fopen( $filename, 'r' );
72 throw new Exception( __METHOD__
. ": Unable to open file $filename" );
75 // Check for the PNG header
76 $buf = fread( $fh, 8 );
77 if ( $buf != self
::$png_sig ) {
78 throw new Exception( __METHOD__
. ": Not a valid PNG file; header: $buf" );
82 while ( !feof( $fh ) ) {
83 $buf = fread( $fh, 4 );
85 throw new Exception( __METHOD__
. ": Read error" );
87 $chunk_size = unpack( "N", $buf );
88 $chunk_size = $chunk_size[1];
90 $chunk_type = fread( $fh, 4 );
92 throw new Exception( __METHOD__
. ": Read error" );
95 if ( $chunk_type == "IHDR" ) {
96 $buf = self
::read( $fh, $chunk_size );
98 throw new Exception( __METHOD__
. ": Read error" );
100 $bitDepth = ord( substr( $buf, 8, 1 ) );
101 // Detect the color type in British English as per the spec
102 // http://www.w3.org/TR/PNG/#11IHDR
103 switch ( ord( substr( $buf, 9, 1 ) ) ) {
105 $colorType = 'greyscale';
108 $colorType = 'truecolour';
111 $colorType = 'index-coloured';
114 $colorType = 'greyscale-alpha';
117 $colorType = 'truecolour-alpha';
120 $colorType = 'unknown';
123 } elseif ( $chunk_type == "acTL" ) {
124 $buf = fread( $fh, $chunk_size );
126 throw new Exception( __METHOD__
. ": Read error" );
129 $actl = unpack( "Nframes/Nplays", $buf );
130 $frameCount = $actl['frames'];
131 $loopCount = $actl['plays'];
132 } elseif ( $chunk_type == "fcTL" ) {
133 $buf = self
::read( $fh, $chunk_size );
135 throw new Exception( __METHOD__
. ": Read error" );
137 $buf = substr( $buf, 20 );
139 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
140 if ( $fctldur['delay_den'] == 0 ) {
141 $fctldur['delay_den'] = 100;
143 if ( $fctldur['delay_num'] ) {
144 $duration +
= $fctldur['delay_num'] / $fctldur['delay_den'];
146 } elseif ( $chunk_type == "iTXt" ) {
147 // Extracts iTXt chunks, uncompressing if necessary.
148 $buf = self
::read( $fh, $chunk_size );
151 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
154 /* $items[1] = text chunk name, $items[2] = compressed flag,
155 * $items[3] = lang code (or ""), $items[4]= compression type.
156 * $items[5] = content
159 // Theoretically should be case-sensitive, but in practise...
160 $items[1] = strtolower( $items[1] );
161 if ( !isset( self
::$text_chunks[$items[1]] ) ) {
162 // Only extract textual chunks on our list.
163 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
167 $items[3] = strtolower( $items[3] );
168 if ( $items[3] == '' ) {
169 // if no lang specified use x-default like in xmp.
170 $items[3] = 'x-default';
174 if ( $items[2] == "\x01" ) {
175 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
176 wfSuppressWarnings();
177 $items[5] = gzuncompress( $items[5] );
180 if ( $items[5] === false ) {
181 // decompression failed
182 wfDebug( __METHOD__
. ' Error decompressing iTxt chunk - ' . $items[1] );
183 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
188 wfDebug( __METHOD__
. ' Skipping compressed png iTXt chunk due to lack of zlib,'
189 . ' or potentially invalid compression method' );
190 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
194 $finalKeyword = self
::$text_chunks[ $items[1] ];
195 $text[ $finalKeyword ][ $items[3] ] = $items[5];
196 $text[ $finalKeyword ]['_type'] = 'lang';
199 // Error reading iTXt chunk
200 throw new Exception( __METHOD__
. ": Read error on iTXt chunk" );
203 } elseif ( $chunk_type == 'tEXt' ) {
204 $buf = self
::read( $fh, $chunk_size );
206 // In case there is no \x00 which will make explode fail.
207 if ( strpos( $buf, "\x00" ) === false ) {
208 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
211 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
212 if ( $keyword === '' ||
$content === '' ) {
213 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
216 // Theoretically should be case-sensitive, but in practise...
217 $keyword = strtolower( $keyword );
218 if ( !isset( self
::$text_chunks[ $keyword ] ) ) {
219 // Don't recognize chunk, so skip.
220 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
223 wfSuppressWarnings();
224 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
227 if ( $content === false ) {
228 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
231 $finalKeyword = self
::$text_chunks[ $keyword ];
232 $text[ $finalKeyword ][ 'x-default' ] = $content;
233 $text[ $finalKeyword ]['_type'] = 'lang';
235 } elseif ( $chunk_type == 'zTXt' ) {
236 if ( function_exists( 'gzuncompress' ) ) {
237 $buf = self
::read( $fh, $chunk_size );
239 // In case there is no \x00 which will make explode fail.
240 if ( strpos( $buf, "\x00" ) === false ) {
241 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
244 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
245 if ( $keyword === '' ||
$postKeyword === '' ) {
246 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
248 // Theoretically should be case-sensitive, but in practise...
249 $keyword = strtolower( $keyword );
251 if ( !isset( self
::$text_chunks[ $keyword ] ) ) {
252 // Don't recognize chunk, so skip.
253 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
256 $compression = substr( $postKeyword, 0, 1 );
257 $content = substr( $postKeyword, 1 );
258 if ( $compression !== "\x00" ) {
259 wfDebug( __METHOD__
. " Unrecognized compression method in zTXt ($keyword). Skipping." );
260 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
264 wfSuppressWarnings();
265 $content = gzuncompress( $content );
268 if ( $content === false ) {
269 // decompression failed
270 wfDebug( __METHOD__
. ' Error decompressing zTXt chunk - ' . $keyword );
271 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
275 wfSuppressWarnings();
276 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
279 if ( $content === false ) {
280 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
283 $finalKeyword = self
::$text_chunks[ $keyword ];
284 $text[ $finalKeyword ][ 'x-default' ] = $content;
285 $text[ $finalKeyword ]['_type'] = 'lang';
288 wfDebug( __METHOD__
. " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
289 fseek( $fh, $chunk_size, SEEK_CUR
);
291 } elseif ( $chunk_type == 'tIME' ) {
292 // last mod timestamp.
293 if ( $chunk_size !== 7 ) {
294 throw new Exception( __METHOD__
. ": tIME wrong size" );
296 $buf = self
::read( $fh, $chunk_size );
298 throw new Exception( __METHOD__
. ": Read error" );
301 // Note: spec says this should be UTC.
302 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
303 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
304 $t['y'], $t['m'], $t['d'], $t['h'],
305 $t['min'], $t['s'] );
307 $exifTime = wfTimestamp( TS_EXIF
, $strTime );
310 $text['DateTime'] = $exifTime;
313 } elseif ( $chunk_type == 'pHYs' ) {
314 // how big pixels are (dots per meter).
315 if ( $chunk_size !== 9 ) {
316 throw new Exception( __METHOD__
. ": pHYs wrong size" );
319 $buf = self
::read( $fh, $chunk_size );
321 throw new Exception( __METHOD__
. ": Read error" );
324 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
325 if ( $dim['unit'] == 1 ) {
327 // (as opposed to 0 = undefined )
328 $text['XResolution'] = $dim['width']
330 $text['YResolution'] = $dim['height']
332 $text['ResolutionUnit'] = 3;
333 // 3 = dots per cm (from Exif).
336 } elseif ( $chunk_type == "IEND" ) {
339 fseek( $fh, $chunk_size, SEEK_CUR
);
341 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
345 if ( $loopCount > 1 ) {
346 $duration *= $loopCount;
349 if ( isset( $text['DateTimeDigitized'] ) ) {
350 // Convert date format from rfc2822 to exif.
351 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
352 if ( $name === '_type' ) {
356 // fixme: currently timezones are ignored.
357 // possibly should be wfTimestamp's
358 // responsibility. (at least for numeric TZ)
359 $formatted = wfTimestamp( TS_EXIF
, $value );
361 // Only change if we could convert the
363 // The png standard says it should be
364 // in rfc2822 format, but not required.
365 // In general for the exif stuff we
366 // prettify the date if we can, but we
367 // display as-is if we cannot or if
369 // So do the same here.
376 'frameCount' => $frameCount,
377 'loopCount' => $loopCount,
378 'duration' => $duration,
380 'bitDepth' => $bitDepth,
381 'colorType' => $colorType,
386 * Read a chunk, checking to make sure its not too big.
388 * @param $fh resource The file handle
389 * @param $size Integer size in bytes.
390 * @throws Exception if too big.
391 * @return String The chunk.
393 static private function read( $fh, $size ) {
394 if ( $size > self
::MAX_CHUNK_SIZE
) {
395 throw new Exception( __METHOD__
. ': Chunk size of ' . $size .
396 ' too big. Max size is: ' . self
::MAX_CHUNK_SIZE
);
398 return fread( $fh, $size );