rdbms: Avoid selectDB() call in LoadMonitor new connections
[mediawiki.git] / includes / media / Exif.php
blobcd457f0beca125f17a0c689e8283983150197e84
1 <?php
2 /**
3 * Extraction and validation of image metadata.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @ingroup Media
21 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
22 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber
23 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
24 * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
25 * @file
28 /**
29 * Class to extract and validate Exif data from jpeg (and possibly tiff) files.
30 * @ingroup Media
32 class Exif {
33 /** An 8-bit (1-byte) unsigned integer. */
34 const BYTE = 1;
36 /** An 8-bit byte containing one 7-bit ASCII code.
37 * The final byte is terminated with NULL.
39 const ASCII = 2;
41 /** A 16-bit (2-byte) unsigned integer. */
42 const SHORT = 3;
44 /** A 32-bit (4-byte) unsigned integer. */
45 const LONG = 4;
47 /** Two LONGs. The first LONG is the numerator and the second LONG expresses
48 * the denominator
50 const RATIONAL = 5;
52 /** A 16-bit (2-byte) or 32-bit (4-byte) unsigned integer. */
53 const SHORT_OR_LONG = 6;
55 /** An 8-bit byte that can take any value depending on the field definition */
56 const UNDEFINED = 7;
58 /** A 32-bit (4-byte) signed integer (2's complement notation), */
59 const SLONG = 9;
61 /** Two SLONGs. The first SLONG is the numerator and the second SLONG is
62 * the denominator.
64 const SRATIONAL = 10;
66 /** A fake value for things we don't want or don't support. */
67 const IGNORE = -1;
69 /** @var array Exif tags grouped by category, the tagname itself is the key
70 * and the type is the value, in the case of more than one possible value
71 * type they are separated by commas.
73 private $mExifTags;
75 /** @var array The raw Exif data returned by exif_read_data() */
76 private $mRawExifData;
78 /** @var array A Filtered version of $mRawExifData that has been pruned
79 * of invalid tags and tags that contain content they shouldn't contain
80 * according to the Exif specification
82 private $mFilteredExifData;
84 /** @var string The file being processed */
85 private $file;
87 /** @var string The basename of the file being processed */
88 private $basename;
90 /** @var string The private log to log to, e.g. 'exif' */
91 private $log = false;
93 /** @var string The byte order of the file. Needed because php's extension
94 * doesn't fully process some obscure props.
96 private $byteOrder;
98 /**
99 * @param string $file Filename.
100 * @param string $byteOrder Type of byte ordering either 'BE' (Big Endian)
101 * or 'LE' (Little Endian). Default ''.
102 * @throws MWException
103 * @todo FIXME: The following are broke:
104 * SubjectArea. Need to test the more obscure tags.
105 * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid.
106 * Possibly should treat 0/0 = 0. need to read exif spec on that.
108 function __construct( $file, $byteOrder = '' ) {
110 * Page numbers here refer to pages in the Exif 2.2 standard
112 * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes
113 * so don't put a count parameter for any UNDEFINED values.
115 * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
117 $this->mExifTags = [
118 # TIFF Rev. 6.0 Attribute Information (p22)
119 'IFD0' => [
120 # Tags relating to image structure
121 'ImageWidth' => self::SHORT_OR_LONG, # Image width
122 'ImageLength' => self::SHORT_OR_LONG, # Image height
123 'BitsPerSample' => [ self::SHORT, 3 ], # Number of bits per component
124 # "When a primary image is JPEG compressed, this designation is not"
125 # "necessary and is omitted." (p23)
126 'Compression' => self::SHORT, # Compression scheme #p23
127 'PhotometricInterpretation' => self::SHORT, # Pixel composition #p23
128 'Orientation' => self::SHORT, # Orientation of image #p24
129 'SamplesPerPixel' => self::SHORT, # Number of components
130 'PlanarConfiguration' => self::SHORT, # Image data arrangement #p24
131 'YCbCrSubSampling' => [ self::SHORT, 2 ], # Subsampling ratio of Y to C #p24
132 'YCbCrPositioning' => self::SHORT, # Y and C positioning #p24-25
133 'XResolution' => self::RATIONAL, # Image resolution in width direction
134 'YResolution' => self::RATIONAL, # Image resolution in height direction
135 'ResolutionUnit' => self::SHORT, # Unit of X and Y resolution #(p26)
137 # Tags relating to recording offset
138 'StripOffsets' => self::SHORT_OR_LONG, # Image data location
139 'RowsPerStrip' => self::SHORT_OR_LONG, # Number of rows per strip
140 'StripByteCounts' => self::SHORT_OR_LONG, # Bytes per compressed strip
141 'JPEGInterchangeFormat' => self::SHORT_OR_LONG, # Offset to JPEG SOI
142 'JPEGInterchangeFormatLength' => self::SHORT_OR_LONG, # Bytes of JPEG data
144 # Tags relating to image data characteristics
145 'TransferFunction' => self::IGNORE, # Transfer function
146 'WhitePoint' => [ self::RATIONAL, 2 ], # White point chromaticity
147 'PrimaryChromaticities' => [ self::RATIONAL, 6 ], # Chromaticities of primarities
148 # Color space transformation matrix coefficients #p27
149 'YCbCrCoefficients' => [ self::RATIONAL, 3 ],
150 'ReferenceBlackWhite' => [ self::RATIONAL, 6 ], # Pair of black and white reference values
152 # Other tags
153 'DateTime' => self::ASCII, # File change date and time
154 'ImageDescription' => self::ASCII, # Image title
155 'Make' => self::ASCII, # Image input equipment manufacturer
156 'Model' => self::ASCII, # Image input equipment model
157 'Software' => self::ASCII, # Software used
158 'Artist' => self::ASCII, # Person who created the image
159 'Copyright' => self::ASCII, # Copyright holder
162 # Exif IFD Attribute Information (p30-31)
163 'EXIF' => [
164 # @todo NOTE: Nonexistence of this field is taken to mean nonconformance
165 # to the Exif 2.1 AND 2.2 standards
166 'ExifVersion' => self::UNDEFINED, # Exif version
167 'FlashPixVersion' => self::UNDEFINED, # Supported Flashpix version #p32
169 # Tags relating to Image Data Characteristics
170 'ColorSpace' => self::SHORT, # Color space information #p32
172 # Tags relating to image configuration
173 'ComponentsConfiguration' => self::UNDEFINED, # Meaning of each component #p33
174 'CompressedBitsPerPixel' => self::RATIONAL, # Image compression mode
175 'PixelYDimension' => self::SHORT_OR_LONG, # Valid image height
176 'PixelXDimension' => self::SHORT_OR_LONG, # Valid image width
178 # Tags relating to related user information
179 'MakerNote' => self::IGNORE, # Manufacturer notes
180 'UserComment' => self::UNDEFINED, # User comments #p34
182 # Tags relating to related file information
183 'RelatedSoundFile' => self::ASCII, # Related audio file
185 # Tags relating to date and time
186 'DateTimeOriginal' => self::ASCII, # Date and time of original data generation #p36
187 'DateTimeDigitized' => self::ASCII, # Date and time of original data generation
188 'SubSecTime' => self::ASCII, # DateTime subseconds
189 'SubSecTimeOriginal' => self::ASCII, # DateTimeOriginal subseconds
190 'SubSecTimeDigitized' => self::ASCII, # DateTimeDigitized subseconds
192 # Tags relating to picture-taking conditions (p31)
193 'ExposureTime' => self::RATIONAL, # Exposure time
194 'FNumber' => self::RATIONAL, # F Number
195 'ExposureProgram' => self::SHORT, # Exposure Program #p38
196 'SpectralSensitivity' => self::ASCII, # Spectral sensitivity
197 'ISOSpeedRatings' => self::SHORT, # ISO speed rating
198 'OECF' => self::IGNORE,
199 # Optoelectronic conversion factor. Note: We don't have support for this atm.
200 'ShutterSpeedValue' => self::SRATIONAL, # Shutter speed
201 'ApertureValue' => self::RATIONAL, # Aperture
202 'BrightnessValue' => self::SRATIONAL, # Brightness
203 'ExposureBiasValue' => self::SRATIONAL, # Exposure bias
204 'MaxApertureValue' => self::RATIONAL, # Maximum land aperture
205 'SubjectDistance' => self::RATIONAL, # Subject distance
206 'MeteringMode' => self::SHORT, # Metering mode #p40
207 'LightSource' => self::SHORT, # Light source #p40-41
208 'Flash' => self::SHORT, # Flash #p41-42
209 'FocalLength' => self::RATIONAL, # Lens focal length
210 'SubjectArea' => [ self::SHORT, 4 ], # Subject area
211 'FlashEnergy' => self::RATIONAL, # Flash energy
212 'SpatialFrequencyResponse' => self::IGNORE, # Spatial frequency response. Not supported atm.
213 'FocalPlaneXResolution' => self::RATIONAL, # Focal plane X resolution
214 'FocalPlaneYResolution' => self::RATIONAL, # Focal plane Y resolution
215 'FocalPlaneResolutionUnit' => self::SHORT, # Focal plane resolution unit #p46
216 'SubjectLocation' => [ self::SHORT, 2 ], # Subject location
217 'ExposureIndex' => self::RATIONAL, # Exposure index
218 'SensingMethod' => self::SHORT, # Sensing method #p46
219 'FileSource' => self::UNDEFINED, # File source #p47
220 'SceneType' => self::UNDEFINED, # Scene type #p47
221 'CFAPattern' => self::IGNORE, # CFA pattern. not supported atm.
222 'CustomRendered' => self::SHORT, # Custom image processing #p48
223 'ExposureMode' => self::SHORT, # Exposure mode #p48
224 'WhiteBalance' => self::SHORT, # White Balance #p49
225 'DigitalZoomRatio' => self::RATIONAL, # Digital zoom ration
226 'FocalLengthIn35mmFilm' => self::SHORT, # Focal length in 35 mm film
227 'SceneCaptureType' => self::SHORT, # Scene capture type #p49
228 'GainControl' => self::SHORT, # Scene control #p49-50
229 'Contrast' => self::SHORT, # Contrast #p50
230 'Saturation' => self::SHORT, # Saturation #p50
231 'Sharpness' => self::SHORT, # Sharpness #p50
232 'DeviceSettingDescription' => self::IGNORE,
233 # Device settings description. This could maybe be supported. Need to find an
234 # example file that uses this to see if it has stuff of interest in it.
235 'SubjectDistanceRange' => self::SHORT, # Subject distance range #p51
237 'ImageUniqueID' => self::ASCII, # Unique image ID
240 # GPS Attribute Information (p52)
241 'GPS' => [
242 'GPSVersion' => self::UNDEFINED,
243 # Should be an array of 4 Exif::BYTE's. However php treats it as an undefined
244 # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix
245 'GPSLatitudeRef' => self::ASCII, # North or South Latitude #p52-53
246 'GPSLatitude' => [ self::RATIONAL, 3 ], # Latitude
247 'GPSLongitudeRef' => self::ASCII, # East or West Longitude #p53
248 'GPSLongitude' => [ self::RATIONAL, 3 ], # Longitude
249 'GPSAltitudeRef' => self::UNDEFINED,
250 # Altitude reference. Note, the exif standard says this should be an EXIF::Byte,
251 # but php seems to disagree.
252 'GPSAltitude' => self::RATIONAL, # Altitude
253 'GPSTimeStamp' => [ self::RATIONAL, 3 ], # GPS time (atomic clock)
254 'GPSSatellites' => self::ASCII, # Satellites used for measurement
255 'GPSStatus' => self::ASCII, # Receiver status #p54
256 'GPSMeasureMode' => self::ASCII, # Measurement mode #p54-55
257 'GPSDOP' => self::RATIONAL, # Measurement precision
258 'GPSSpeedRef' => self::ASCII, # Speed unit #p55
259 'GPSSpeed' => self::RATIONAL, # Speed of GPS receiver
260 'GPSTrackRef' => self::ASCII, # Reference for direction of movement #p55
261 'GPSTrack' => self::RATIONAL, # Direction of movement
262 'GPSImgDirectionRef' => self::ASCII, # Reference for direction of image #p56
263 'GPSImgDirection' => self::RATIONAL, # Direction of image
264 'GPSMapDatum' => self::ASCII, # Geodetic survey data used
265 'GPSDestLatitudeRef' => self::ASCII, # Reference for latitude of destination #p56
266 'GPSDestLatitude' => [ self::RATIONAL, 3 ], # Latitude destination
267 'GPSDestLongitudeRef' => self::ASCII, # Reference for longitude of destination #p57
268 'GPSDestLongitude' => [ self::RATIONAL, 3 ], # Longitude of destination
269 'GPSDestBearingRef' => self::ASCII, # Reference for bearing of destination #p57
270 'GPSDestBearing' => self::RATIONAL, # Bearing of destination
271 'GPSDestDistanceRef' => self::ASCII, # Reference for distance to destination #p57-58
272 'GPSDestDistance' => self::RATIONAL, # Distance to destination
273 'GPSProcessingMethod' => self::UNDEFINED, # Name of GPS processing method
274 'GPSAreaInformation' => self::UNDEFINED, # Name of GPS area
275 'GPSDateStamp' => self::ASCII, # GPS date
276 'GPSDifferential' => self::SHORT, # GPS differential correction
280 $this->file = $file;
281 $this->basename = wfBaseName( $this->file );
282 if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) {
283 $this->byteOrder = $byteOrder;
284 } else {
285 // Only give a warning for b/c, since originally we didn't
286 // require this. The number of things affected by this is
287 // rather small.
288 wfWarn( 'Exif class did not have byte order specified. ' .
289 'Some properties may be decoded incorrectly.' );
290 $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's.
293 $this->debugFile( $this->basename, __FUNCTION__, true );
294 if ( function_exists( 'exif_read_data' ) ) {
295 MediaWiki\suppressWarnings();
296 $data = exif_read_data( $this->file, 0, true );
297 MediaWiki\restoreWarnings();
298 } else {
299 throw new MWException( "Internal error: exif_read_data not present. " .
300 "\$wgShowEXIF may be incorrectly set or not checked by an extension." );
303 * exif_read_data() will return false on invalid input, such as
304 * when somebody uploads a file called something.jpeg
305 * containing random gibberish.
307 $this->mRawExifData = $data ?: [];
308 $this->makeFilteredData();
309 $this->collapseData();
310 $this->debugFile( __FUNCTION__, false );
314 * Make $this->mFilteredExifData
316 function makeFilteredData() {
317 $this->mFilteredExifData = [];
319 foreach ( array_keys( $this->mRawExifData ) as $section ) {
320 if ( !array_key_exists( $section, $this->mExifTags ) ) {
321 $this->debug( $section, __FUNCTION__, "'$section' is not a valid Exif section" );
322 continue;
325 foreach ( array_keys( $this->mRawExifData[$section] ) as $tag ) {
326 if ( !array_key_exists( $tag, $this->mExifTags[$section] ) ) {
327 $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" );
328 continue;
331 $this->mFilteredExifData[$tag] = $this->mRawExifData[$section][$tag];
332 // This is ok, as the tags in the different sections do not conflict.
333 // except in computed and thumbnail section, which we don't use.
335 $value = $this->mRawExifData[$section][$tag];
336 if ( !$this->validate( $section, $tag, $value ) ) {
337 $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" );
338 unset( $this->mFilteredExifData[$tag] );
345 * Collapse some fields together.
346 * This converts some fields from exif form, to a more friendly form.
347 * For example GPS latitude to a single number.
349 * The rationale behind this is that we're storing data, not presenting to the user
350 * For example a longitude is a single number describing how far away you are from
351 * the prime meridian. Well it might be nice to split it up into minutes and seconds
352 * for the user, it doesn't really make sense to split a single number into 4 parts
353 * for storage. (degrees, minutes, second, direction vs single floating point number).
355 * Other things this might do (not really sure if they make sense or not):
356 * Dates -> mediawiki date format.
357 * convert values that can be in different units to be in one standardized unit.
359 * As an alternative approach, some of this could be done in the validate phase
360 * if we make up our own types like Exif::DATE.
362 function collapseData() {
363 $this->exifGPStoNumber( 'GPSLatitude' );
364 $this->exifGPStoNumber( 'GPSDestLatitude' );
365 $this->exifGPStoNumber( 'GPSLongitude' );
366 $this->exifGPStoNumber( 'GPSDestLongitude' );
368 if ( isset( $this->mFilteredExifData['GPSAltitude'] )
369 && isset( $this->mFilteredExifData['GPSAltitudeRef'] )
371 // We know altitude data is a <num>/<denom> from the validation
372 // functions ran earlier. But multiplying such a string by -1
373 // doesn't work well, so convert.
374 list( $num, $denom ) = explode( '/', $this->mFilteredExifData['GPSAltitude'] );
375 $this->mFilteredExifData['GPSAltitude'] = $num / $denom;
377 if ( $this->mFilteredExifData['GPSAltitudeRef'] === "\1" ) {
378 $this->mFilteredExifData['GPSAltitude'] *= -1;
380 unset( $this->mFilteredExifData['GPSAltitudeRef'] );
383 $this->exifPropToOrd( 'FileSource' );
384 $this->exifPropToOrd( 'SceneType' );
386 $this->charCodeString( 'UserComment' );
387 $this->charCodeString( 'GPSProcessingMethod' );
388 $this->charCodeString( 'GPSAreaInformation' );
390 // ComponentsConfiguration should really be an array instead of a string...
391 // This turns a string of binary numbers into an array of numbers.
393 if ( isset( $this->mFilteredExifData['ComponentsConfiguration'] ) ) {
394 $val = $this->mFilteredExifData['ComponentsConfiguration'];
395 $ccVals = [];
397 $strLen = strlen( $val );
398 for ( $i = 0; $i < $strLen; $i++ ) {
399 $ccVals[$i] = ord( substr( $val, $i, 1 ) );
401 $ccVals['_type'] = 'ol'; // this is for formatting later.
402 $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals;
405 // GPSVersion(ID) is treated as the wrong type by php exif support.
406 // Go through each byte turning it into a version string.
407 // For example: "\x02\x02\x00\x00" -> "2.2.0.0"
409 // Also change exif tag name from GPSVersion (what php exif thinks it is)
410 // to GPSVersionID (what the exif standard thinks it is).
412 if ( isset( $this->mFilteredExifData['GPSVersion'] ) ) {
413 $val = $this->mFilteredExifData['GPSVersion'];
414 $newVal = '';
416 $strLen = strlen( $val );
417 for ( $i = 0; $i < $strLen; $i++ ) {
418 if ( $i !== 0 ) {
419 $newVal .= '.';
421 $newVal .= ord( substr( $val, $i, 1 ) );
424 if ( $this->byteOrder === 'LE' ) {
425 // Need to reverse the string
426 $newVal2 = '';
427 for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) {
428 $newVal2 .= substr( $newVal, $i, 1 );
430 $this->mFilteredExifData['GPSVersionID'] = $newVal2;
431 } else {
432 $this->mFilteredExifData['GPSVersionID'] = $newVal;
434 unset( $this->mFilteredExifData['GPSVersion'] );
439 * Do userComment tags and similar. See pg. 34 of exif standard.
440 * basically first 8 bytes is charset, rest is value.
441 * This has not been tested on any shift-JIS strings.
442 * @param string $prop Prop name
444 private function charCodeString( $prop ) {
445 if ( isset( $this->mFilteredExifData[$prop] ) ) {
446 if ( strlen( $this->mFilteredExifData[$prop] ) <= 8 ) {
447 // invalid. Must be at least 9 bytes long.
449 $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, false );
450 unset( $this->mFilteredExifData[$prop] );
452 return;
454 $charCode = substr( $this->mFilteredExifData[$prop], 0, 8 );
455 $val = substr( $this->mFilteredExifData[$prop], 8 );
457 switch ( $charCode ) {
458 case "\x4A\x49\x53\x00\x00\x00\x00\x00":
459 // JIS
460 $charset = "Shift-JIS";
461 break;
462 case "UNICODE\x00":
463 $charset = "UTF-16" . $this->byteOrder;
464 break;
465 default: // ascii or undefined.
466 $charset = "";
467 break;
469 if ( $charset ) {
470 MediaWiki\suppressWarnings();
471 $val = iconv( $charset, 'UTF-8//IGNORE', $val );
472 MediaWiki\restoreWarnings();
473 } else {
474 // if valid utf-8, assume that, otherwise assume windows-1252
475 $valCopy = $val;
476 UtfNormal\Validator::quickIsNFCVerify( $valCopy ); // validates $valCopy.
477 if ( $valCopy !== $val ) {
478 MediaWiki\suppressWarnings();
479 $val = iconv( 'Windows-1252', 'UTF-8//IGNORE', $val );
480 MediaWiki\restoreWarnings();
484 // trim and check to make sure not only whitespace.
485 $val = trim( $val );
486 if ( strlen( $val ) === 0 ) {
487 // only whitespace.
488 $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, "$prop: Is only whitespace" );
489 unset( $this->mFilteredExifData[$prop] );
491 return;
494 // all's good.
495 $this->mFilteredExifData[$prop] = $val;
500 * Convert an Exif::UNDEFINED from a raw binary string
501 * to its value. This is sometimes needed depending on
502 * the type of UNDEFINED field
503 * @param string $prop Name of property
505 private function exifPropToOrd( $prop ) {
506 if ( isset( $this->mFilteredExifData[$prop] ) ) {
507 $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] );
512 * Convert gps in exif form to a single floating point number
513 * for example 10 degress 20`40`` S -> -10.34444
514 * @param string $prop A GPS coordinate exif tag name (like GPSLongitude)
516 private function exifGPStoNumber( $prop ) {
517 $loc =& $this->mFilteredExifData[$prop];
518 $dir =& $this->mFilteredExifData[$prop . 'Ref'];
519 $res = false;
521 if ( isset( $loc ) && isset( $dir )
522 && ( $dir === 'N' || $dir === 'S' || $dir === 'E' || $dir === 'W' )
524 list( $num, $denom ) = explode( '/', $loc[0] );
525 $res = $num / $denom;
526 list( $num, $denom ) = explode( '/', $loc[1] );
527 $res += ( $num / $denom ) * ( 1 / 60 );
528 list( $num, $denom ) = explode( '/', $loc[2] );
529 $res += ( $num / $denom ) * ( 1 / 3600 );
531 if ( $dir === 'S' || $dir === 'W' ) {
532 $res *= -1; // make negative
536 // update the exif records.
538 if ( $res !== false ) { // using !== as $res could potentially be 0
539 $this->mFilteredExifData[$prop] = $res;
540 unset( $this->mFilteredExifData[$prop . 'Ref'] );
541 } else { // if invalid
542 unset( $this->mFilteredExifData[$prop] );
543 unset( $this->mFilteredExifData[$prop . 'Ref'] );
547 /**#@-*/
549 /**#@+
550 * @return array
553 * Get $this->mRawExifData
554 * @return array
556 function getData() {
557 return $this->mRawExifData;
561 * Get $this->mFilteredExifData
562 * @return array
564 function getFilteredData() {
565 return $this->mFilteredExifData;
568 /**#@-*/
571 * The version of the output format
573 * Before the actual metadata information is saved in the database we
574 * strip some of it since we don't want to save things like thumbnails
575 * which usually accompany Exif data. This value gets saved in the
576 * database along with the actual Exif data, and if the version in the
577 * database doesn't equal the value returned by this function the Exif
578 * data is regenerated.
580 * @return int
582 public static function version() {
583 return 2; // We don't need no bloddy constants!
587 * Validates if a tag value is of the type it should be according to the Exif spec
589 * @param mixed $in The input value to check
590 * @return bool
592 private function isByte( $in ) {
593 if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 255 ) {
594 $this->debug( $in, __FUNCTION__, true );
596 return true;
597 } else {
598 $this->debug( $in, __FUNCTION__, false );
600 return false;
605 * @param mixed $in The input value to check
606 * @return bool
608 private function isASCII( $in ) {
609 if ( is_array( $in ) ) {
610 return false;
613 if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) {
614 $this->debug( $in, __FUNCTION__, 'found a character not in our whitelist' );
616 return false;
619 if ( preg_match( '/^\s*$/', $in ) ) {
620 $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' );
622 return false;
625 return true;
629 * @param mixed $in The input value to check
630 * @return bool
632 private function isShort( $in ) {
633 if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 65536 ) {
634 $this->debug( $in, __FUNCTION__, true );
636 return true;
637 } else {
638 $this->debug( $in, __FUNCTION__, false );
640 return false;
645 * @param mixed $in The input value to check
646 * @return bool
648 private function isLong( $in ) {
649 if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 4294967296 ) {
650 $this->debug( $in, __FUNCTION__, true );
652 return true;
653 } else {
654 $this->debug( $in, __FUNCTION__, false );
656 return false;
661 * @param mixed $in The input value to check
662 * @return bool
664 private function isRational( $in ) {
665 $m = [];
667 # Avoid division by zero
668 if ( !is_array( $in )
669 && preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m )
671 return $this->isLong( $m[1] ) && $this->isLong( $m[2] );
672 } else {
673 $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
675 return false;
680 * @param mixed $in The input value to check
681 * @return bool
683 private function isUndefined( $in ) {
684 $this->debug( $in, __FUNCTION__, true );
686 return true;
690 * @param mixed $in The input value to check
691 * @return bool
693 private function isSlong( $in ) {
694 if ( $this->isLong( abs( $in ) ) ) {
695 $this->debug( $in, __FUNCTION__, true );
697 return true;
698 } else {
699 $this->debug( $in, __FUNCTION__, false );
701 return false;
706 * @param mixed $in The input value to check
707 * @return bool
709 private function isSrational( $in ) {
710 $m = [];
712 # Avoid division by zero
713 if ( !is_array( $in ) &&
714 preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m )
716 return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] );
717 } else {
718 $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
720 return false;
724 /**#@-*/
727 * Validates if a tag has a legal value according to the Exif spec
729 * @param string $section Section where tag is located.
730 * @param string $tag The tag to check.
731 * @param mixed $val The value of the tag.
732 * @param bool $recursive True if called recursively for array types.
733 * @return bool
735 private function validate( $section, $tag, $val, $recursive = false ) {
736 $debug = "tag is '$tag'";
737 $etype = $this->mExifTags[$section][$tag];
738 $ecount = 1;
739 if ( is_array( $etype ) ) {
740 list( $etype, $ecount ) = $etype;
741 if ( $recursive ) {
742 $ecount = 1; // checking individual elements
745 $count = count( $val );
746 if ( $ecount != $count ) {
747 $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" );
749 return false;
751 if ( $count > 1 ) {
752 foreach ( $val as $v ) {
753 if ( !$this->validate( $section, $tag, $v, true ) ) {
754 return false;
758 return true;
760 // Does not work if not typecast
761 switch ( (string)$etype ) {
762 case (string)self::BYTE:
763 $this->debug( $val, __FUNCTION__, $debug );
765 return $this->isByte( $val );
766 case (string)self::ASCII:
767 $this->debug( $val, __FUNCTION__, $debug );
769 return $this->isASCII( $val );
770 case (string)self::SHORT:
771 $this->debug( $val, __FUNCTION__, $debug );
773 return $this->isShort( $val );
774 case (string)self::LONG:
775 $this->debug( $val, __FUNCTION__, $debug );
777 return $this->isLong( $val );
778 case (string)self::RATIONAL:
779 $this->debug( $val, __FUNCTION__, $debug );
781 return $this->isRational( $val );
782 case (string)self::SHORT_OR_LONG:
783 $this->debug( $val, __FUNCTION__, $debug );
785 return $this->isShort( $val ) || $this->isLong( $val );
786 case (string)self::UNDEFINED:
787 $this->debug( $val, __FUNCTION__, $debug );
789 return $this->isUndefined( $val );
790 case (string)self::SLONG:
791 $this->debug( $val, __FUNCTION__, $debug );
793 return $this->isSlong( $val );
794 case (string)self::SRATIONAL:
795 $this->debug( $val, __FUNCTION__, $debug );
797 return $this->isSrational( $val );
798 case (string)self::IGNORE:
799 $this->debug( $val, __FUNCTION__, $debug );
801 return false;
802 default:
803 $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" );
805 return false;
810 * Convenience function for debugging output
812 * @param mixed $in Arrays will be processed with print_r().
813 * @param string $fname Function name to log.
814 * @param string|bool|null $action Default null.
816 private function debug( $in, $fname, $action = null ) {
817 if ( !$this->log ) {
818 return;
820 $type = gettype( $in );
821 $class = ucfirst( __CLASS__ );
822 if ( is_array( $in ) ) {
823 $in = print_r( $in, true );
826 if ( $action === true ) {
827 wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)" );
828 } elseif ( $action === false ) {
829 wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)" );
830 } elseif ( $action === null ) {
831 wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)" );
832 } else {
833 wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')" );
838 * Convenience function for debugging output
840 * @param string $fname The name of the function calling this function
841 * @param bool $io Specify whether we're beginning or ending
843 private function debugFile( $fname, $io ) {
844 if ( !$this->log ) {
845 return;
847 $class = ucfirst( __CLASS__ );
848 if ( $io ) {
849 wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'" );
850 } else {
851 wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'" );