5 * Originally written in Perl by Steve Sanbeg.
6 * Ported to PHP by Andrew Garrett
7 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
34 class GIFMetadataExtractor
{
36 private static $gifFrameSep;
39 private static $gifExtensionSep;
42 private static $gifTerm;
46 // Each sub-block is less than or equal to 255 bytes.
47 // Most of the time its 255 bytes, except for in XMP
48 // blocks, where it's usually between 32-127 bytes each.
49 const MAX_SUBBLOCKS
= 262144; // 5mb divided by 20.
53 * @param string $filename
56 static function getMetadata( $filename ) {
57 self
::$gifFrameSep = pack( "C", ord( "," ) );
58 self
::$gifExtensionSep = pack( "C", ord( "!" ) );
59 self
::$gifTerm = pack( "C", ord( ";" ) );
68 throw new Exception( "No file name specified" );
69 } elseif ( !file_exists( $filename ) ||
is_dir( $filename ) ) {
70 throw new Exception( "File $filename does not exist" );
73 $fh = fopen( $filename, 'rb' );
76 throw new Exception( "Unable to open file $filename" );
79 // Check for the GIF header
80 $buf = fread( $fh, 6 );
81 if ( !( $buf == 'GIF87a' ||
$buf == 'GIF89a' ) ) {
82 throw new Exception( "Not a valid GIF file; header: $buf" );
85 // Skip over width and height.
89 $buf = fread( $fh, 1 );
90 $bpp = self
::decodeBPP( $buf );
92 // Skip over background and aspect ratio
96 self
::readGCT( $fh, $bpp );
98 while ( !feof( $fh ) ) {
99 $buf = fread( $fh, 1 );
101 if ( $buf == self
::$gifFrameSep ) {
105 # # Skip bounding box
109 $buf = fread( $fh, 1 );
110 $bpp = self
::decodeBPP( $buf );
113 self
::readGCT( $fh, $bpp );
115 self
::skipBlock( $fh );
116 } elseif ( $buf == self
::$gifExtensionSep ) {
117 $buf = fread( $fh, 1 );
118 if ( strlen( $buf ) < 1 ) {
119 throw new Exception( "Ran out of input" );
121 $extension_code = unpack( 'C', $buf )[1];
123 if ( $extension_code == 0xF9 ) {
124 // Graphics Control Extension.
125 fread( $fh, 1 ); // Block size
127 fread( $fh, 1 ); // Transparency, disposal method, user input
129 $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
130 if ( strlen( $buf ) < 2 ) {
131 throw new Exception( "Ran out of input" );
133 $delay = unpack( 'v', $buf )[1];
134 $duration +
= $delay * 0.01;
136 fread( $fh, 1 ); // Transparent colour index
138 $term = fread( $fh, 1 ); // Should be a terminator
139 if ( strlen( $term ) < 1 ) {
140 throw new Exception( "Ran out of input" );
142 $term = unpack( 'C', $term )[1];
144 throw new Exception( "Malformed Graphics Control Extension block" );
146 } elseif ( $extension_code == 0xFE ) {
148 $data = self
::readBlock( $fh );
149 if ( $data === "" ) {
150 throw new Exception( 'Read error, zero-length comment block' );
153 // The standard says this should be ASCII, however its unclear if
154 // thats true in practise. Check to see if its valid utf-8, if so
155 // assume its that, otherwise assume its windows-1252 (iso-8859-1)
157 // quickIsNFCVerify has the side effect of replacing any invalid characters
158 UtfNormal\Validator
::quickIsNFCVerify( $dataCopy );
160 if ( $dataCopy !== $data ) {
161 MediaWiki\
suppressWarnings();
162 $data = iconv( 'windows-1252', 'UTF-8', $data );
163 MediaWiki\restoreWarnings
();
166 $commentCount = count( $comment );
167 if ( $commentCount === 0
168 ||
$comment[$commentCount - 1] !== $data
170 // Some applications repeat the same comment on each
171 // frame of an animated GIF image, so if this comment
172 // is identical to the last, only extract once.
175 } elseif ( $extension_code == 0xFF ) {
176 // Application extension (Netscape info about the animated gif)
177 // or XMP (or theoretically any other type of extension block)
178 $blockLength = fread( $fh, 1 );
179 if ( strlen( $blockLength ) < 1 ) {
180 throw new Exception( "Ran out of input" );
182 $blockLength = unpack( 'C', $blockLength )[1];
183 $data = fread( $fh, $blockLength );
185 if ( $blockLength != 11 ) {
186 wfDebug( __METHOD__
. " GIF application block with wrong length\n" );
187 fseek( $fh, -( $blockLength +
1 ), SEEK_CUR
);
188 self
::skipBlock( $fh );
192 // NETSCAPE2.0 (application name for animated gif)
193 if ( $data == 'NETSCAPE2.0' ) {
194 $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
196 if ( $data != "\x03\x01" ) {
197 throw new Exception( "Expected \x03\x01, got $data" );
200 // Unsigned little-endian integer, loop count or zero for "forever"
201 $loopData = fread( $fh, 2 );
202 if ( strlen( $loopData ) < 2 ) {
203 throw new Exception( "Ran out of input" );
205 $loopCount = unpack( 'v', $loopData )[1];
207 if ( $loopCount != 1 ) {
211 // Read out terminator byte
213 } elseif ( $data == 'XMP DataXMP' ) {
214 // application name for XMP data.
215 // see pg 18 of XMP spec part 3.
217 $xmp = self
::readBlock( $fh, true );
219 if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
220 ||
substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
222 // this is just a sanity check.
223 throw new Exception( "XMP does not have magic trailer!" );
226 // strip out trailer.
227 $xmp = substr( $xmp, 0, -257 );
229 // unrecognized extension block
230 fseek( $fh, -( $blockLength +
1 ), SEEK_CUR
);
231 self
::skipBlock( $fh );
235 self
::skipBlock( $fh );
237 } elseif ( $buf == self
::$gifTerm ) {
240 if ( strlen( $buf ) < 1 ) {
241 throw new Exception( "Ran out of input" );
243 $byte = unpack( 'C', $buf )[1];
244 throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
249 'frameCount' => $frameCount,
250 'looped' => $isLooped,
251 'duration' => $duration,
253 'comment' => $comment,
258 * @param resource $fh
262 static function readGCT( $fh, $bpp ) {
264 $max = pow( 2, $bpp );
265 for ( $i = 1; $i <= $max; ++
$i ) {
272 * @param string $data
276 static function decodeBPP( $data ) {
277 if ( strlen( $data ) < 1 ) {
278 throw new Exception( "Ran out of input" );
280 $buf = unpack( 'C', $data )[1];
281 $bpp = ( $buf & 7 ) +
1;
284 $have_map = $buf & 1;
286 return $have_map ?
$bpp : 0;
290 * @param resource $fh
293 static function skipBlock( $fh ) {
294 while ( !feof( $fh ) ) {
295 $buf = fread( $fh, 1 );
296 if ( strlen( $buf ) < 1 ) {
297 throw new Exception( "Ran out of input" );
299 $block_len = unpack( 'C', $buf )[1];
300 if ( $block_len == 0 ) {
303 fread( $fh, $block_len );
308 * Read a block. In the GIF format, a block is made up of
309 * several sub-blocks. Each sub block starts with one byte
310 * saying how long the sub-block is, followed by the sub-block.
311 * The entire block is terminated by a sub-block of length
313 * @param resource $fh File handle
314 * @param bool $includeLengths Include the length bytes of the
315 * sub-blocks in the returned value. Normally this is false,
316 * except XMP is weird and does a hack where you need to keep
317 * these length bytes.
319 * @return string The data.
321 static function readBlock( $fh, $includeLengths = false ) {
323 $subLength = fread( $fh, 1 );
326 while ( $subLength !== "\0" ) {
328 if ( $blocks > self
::MAX_SUBBLOCKS
) {
329 throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
332 throw new Exception( "Read error: Unexpected EOF." );
334 if ( $includeLengths ) {
338 $data .= fread( $fh, ord( $subLength ) );
339 $subLength = fread( $fh, 1 );