[ZF-10089] Zend_Log
[zend/radio.git] / library / Zend / Pdf / FileParser.php
blob8e042b34e935b0a9fd194b30492f9b2399c71905
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @category Zend
16 * @package Zend_Pdf
17 * @subpackage FileParser
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id$
23 /**
24 * Abstract utility class for parsing binary files.
26 * Provides a library of methods to quickly navigate and extract various data
27 * types (signed and unsigned integers, floating- and fixed-point numbers,
28 * strings, etc.) from the file.
30 * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
31 * This allows the same parser code to work with many different data sources:
32 * in-memory objects, filesystem files, etc.
34 * @package Zend_Pdf
35 * @subpackage FileParser
36 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
37 * @license http://framework.zend.com/license/new-bsd New BSD License
39 abstract class Zend_Pdf_FileParser
41 /**** Class Constants ****/
43 /**
44 * Little-endian byte order (0x04 0x03 0x02 0x01).
46 const BYTE_ORDER_LITTLE_ENDIAN = 0;
48 /**
49 * Big-endian byte order (0x01 0x02 0x03 0x04).
51 const BYTE_ORDER_BIG_ENDIAN = 1;
55 /**** Instance Variables ****/
58 /**
59 * Flag indicating that the file has passed a cursory validation check.
60 * @var boolean
62 protected $_isScreened = false;
64 /**
65 * Flag indicating that the file has been sucessfully parsed.
66 * @var boolean
68 protected $_isParsed = false;
70 /**
71 * Object representing the data source to be parsed.
72 * @var Zend_Pdf_FileParserDataSource
74 protected $_dataSource = null;
78 /**** Public Interface ****/
81 /* Abstract Methods */
83 /**
84 * Performs a cursory check to verify that the binary file is in the expected
85 * format. Intended to quickly weed out obviously bogus files.
87 * Must set $this->_isScreened to true if successful.
89 * @throws Zend_Pdf_Exception
91 abstract public function screen();
93 /**
94 * Reads and parses the complete binary file.
96 * Must set $this->_isParsed to true if successful.
98 * @throws Zend_Pdf_Exception
100 abstract public function parse();
103 /* Object Lifecycle */
106 * Object constructor.
108 * Verifies that the data source has been properly initialized.
110 * @param Zend_Pdf_FileParserDataSource $dataSource
111 * @throws Zend_Pdf_Exception
113 public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
115 if ($dataSource->getSize() == 0) {
116 require_once 'Zend/Pdf/Exception.php';
117 throw new Zend_Pdf_Exception('The data source has not been properly initialized',
118 Zend_Pdf_Exception::BAD_DATA_SOURCE);
120 $this->_dataSource = $dataSource;
124 * Object destructor.
126 * Discards the data source object.
128 public function __destruct()
130 $this->_dataSource = null;
134 /* Accessors */
137 * Returns true if the file has passed a cursory validation check.
139 * @return boolean
141 public function isScreened()
143 return $this->_isScreened;
147 * Returns true if the file has been successfully parsed.
149 * @return boolean
151 public function isParsed()
153 return $this->_isParsed;
157 * Returns the data source object representing the file being parsed.
159 * @return Zend_Pdf_FileParserDataSource
161 public function getDataSource()
163 return $this->_dataSource;
167 /* Primitive Methods */
170 * Convenience wrapper for the data source object's moveToOffset() method.
172 * @param integer $offset Destination byte offset.
173 * @throws Zend_Pdf_Exception
175 public function moveToOffset($offset)
177 $this->_dataSource->moveToOffset($offset);
180 public function getOffset() {
181 return $this->_dataSource->getOffset();
184 public function getSize() {
185 return $this->_dataSource->getSize();
189 * Convenience wrapper for the data source object's readBytes() method.
191 * @param integer $byteCount Number of bytes to read.
192 * @return string
193 * @throws Zend_Pdf_Exception
195 public function readBytes($byteCount)
197 return $this->_dataSource->readBytes($byteCount);
201 * Convenience wrapper for the data source object's skipBytes() method.
203 * @param integer $byteCount Number of bytes to skip.
204 * @throws Zend_Pdf_Exception
206 public function skipBytes($byteCount)
208 $this->_dataSource->skipBytes($byteCount);
212 /* Parser Methods */
215 * Reads the signed integer value from the binary file at the current byte
216 * offset.
218 * Advances the offset by the number of bytes read. Throws an exception if
219 * an error occurs.
221 * @param integer $size Size of integer in bytes: 1-4
222 * @param integer $byteOrder (optional) Big- or little-endian byte order.
223 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
224 * If omitted, uses big-endian.
225 * @return integer
226 * @throws Zend_Pdf_Exception
228 public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
230 if (($size < 1) || ($size > 4)) {
231 require_once 'Zend/Pdf/Exception.php';
232 throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
233 Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
235 $bytes = $this->_dataSource->readBytes($size);
236 /* unpack() will not work for this method because it always works in
237 * the host byte order for signed integers. It also does not allow for
238 * variable integer sizes.
240 if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
241 $number = ord($bytes[0]);
242 if (($number & 0x80) == 0x80) {
243 /* This number is negative. Extract the positive equivalent.
245 $number = (~ $number) & 0xff;
246 for ($i = 1; $i < $size; $i++) {
247 $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
249 /* Now turn this back into a negative number by taking the
250 * two's complement (we didn't add one above so won't
251 * subtract it below). This works reliably on both 32- and
252 * 64-bit systems.
254 $number = ~$number;
255 } else {
256 for ($i = 1; $i < $size; $i++) {
257 $number = ($number << 8) | ord($bytes[$i]);
260 } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
261 $number = ord($bytes[$size - 1]);
262 if (($number & 0x80) == 0x80) {
263 /* Negative number. See discussion above.
265 $number = 0;
266 for ($i = --$size; $i >= 0; $i--) {
267 $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
269 $number = ~$number;
270 } else {
271 $number = 0;
272 for ($i = --$size; $i >= 0; $i--) {
273 $number |= ord($bytes[$i]) << ($i * 8);
276 } else {
277 require_once 'Zend/Pdf/Exception.php';
278 throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
279 Zend_Pdf_Exception::INVALID_BYTE_ORDER);
281 return $number;
285 * Reads the unsigned integer value from the binary file at the current byte
286 * offset.
288 * Advances the offset by the number of bytes read. Throws an exception if
289 * an error occurs.
291 * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
292 * resulting value WILL BE SIGNED because PHP uses signed integers internally
293 * for everything. To guarantee portability, be sure to use bitwise operators
294 * operators on large unsigned integers!
296 * @param integer $size Size of integer in bytes: 1-4
297 * @param integer $byteOrder (optional) Big- or little-endian byte order.
298 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
299 * If omitted, uses big-endian.
300 * @return integer
301 * @throws Zend_Pdf_Exception
303 public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
305 if (($size < 1) || ($size > 4)) {
306 require_once 'Zend/Pdf/Exception.php';
307 throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
308 Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
310 $bytes = $this->_dataSource->readBytes($size);
311 /* unpack() is a bit heavyweight for this simple conversion. Just
312 * work the bytes directly.
314 if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
315 $number = ord($bytes[0]);
316 for ($i = 1; $i < $size; $i++) {
317 $number = ($number << 8) | ord($bytes[$i]);
319 } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
320 $number = 0;
321 for ($i = --$size; $i >= 0; $i--) {
322 $number |= ord($bytes[$i]) << ($i * 8);
324 } else {
325 require_once 'Zend/Pdf/Exception.php';
326 throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
327 Zend_Pdf_Exception::INVALID_BYTE_ORDER);
329 return $number;
333 * Returns true if the specified bit is set in the integer bitfield.
335 * @param integer $bit Bit number to test (i.e. - 0-31)
336 * @param integer $bitField
337 * @return boolean
339 public function isBitSet($bit, $bitField)
341 $bitMask = 1 << $bit;
342 $isSet = (($bitField & $bitMask) == $bitMask);
343 return $isSet;
347 * Reads the signed fixed-point number from the binary file at the current
348 * byte offset.
350 * Common fixed-point sizes are 2.14 and 16.16.
352 * Advances the offset by the number of bytes read. Throws an exception if
353 * an error occurs.
355 * @param integer $mantissaBits Number of bits in the mantissa
356 * @param integer $fractionBits Number of bits in the fraction
357 * @param integer $byteOrder (optional) Big- or little-endian byte order.
358 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
359 * If omitted, uses big-endian.
360 * @return float
361 * @throws Zend_Pdf_Exception
363 public function readFixed($mantissaBits, $fractionBits,
364 $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
366 $bitsToRead = $mantissaBits + $fractionBits;
367 if (($bitsToRead % 8) !== 0) {
368 require_once 'Zend/Pdf/Exception.php';
369 throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
370 Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
372 $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
373 return $number;
377 * Reads the Unicode UTF-16-encoded string from the binary file at the
378 * current byte offset.
380 * The byte order of the UTF-16 string must be specified. You must also
381 * supply the desired resulting character set.
383 * Advances the offset by the number of bytes read. Throws an exception if
384 * an error occurs.
386 * @todo Consider changing $byteCount to a character count. They are not
387 * always equivalent (in the case of surrogates).
388 * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
389 * string being extracted.
391 * @param integer $byteCount Number of bytes (characters * 2) to return.
392 * @param integer $byteOrder (optional) Big- or little-endian byte order.
393 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
394 * If omitted, uses big-endian.
395 * @param string $characterSet (optional) Desired resulting character set.
396 * You may use any character set supported by {@link iconv()}. If omitted,
397 * uses 'current locale'.
398 * @return string
399 * @throws Zend_Pdf_Exception
401 public function readStringUTF16($byteCount,
402 $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
403 $characterSet = '')
405 if ($byteCount == 0) {
406 return '';
408 $bytes = $this->_dataSource->readBytes($byteCount);
409 if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
410 if ($characterSet == 'UTF-16BE') {
411 return $bytes;
413 return iconv('UTF-16BE', $characterSet, $bytes);
414 } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
415 if ($characterSet == 'UTF-16LE') {
416 return $bytes;
418 return iconv('UTF-16LE', $characterSet, $bytes);
419 } else {
420 require_once 'Zend/Pdf/Exception.php';
421 throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
422 Zend_Pdf_Exception::INVALID_BYTE_ORDER);
427 * Reads the Mac Roman-encoded string from the binary file at the current
428 * byte offset.
430 * You must supply the desired resulting character set.
432 * Advances the offset by the number of bytes read. Throws an exception if
433 * an error occurs.
435 * @param integer $byteCount Number of bytes (characters) to return.
436 * @param string $characterSet (optional) Desired resulting character set.
437 * You may use any character set supported by {@link iconv()}. If omitted,
438 * uses 'current locale'.
439 * @return string
440 * @throws Zend_Pdf_Exception
442 public function readStringMacRoman($byteCount, $characterSet = '')
444 if ($byteCount == 0) {
445 return '';
447 $bytes = $this->_dataSource->readBytes($byteCount);
448 if ($characterSet == 'MacRoman') {
449 return $bytes;
451 return iconv('MacRoman', $characterSet, $bytes);
455 * Reads the Pascal string from the binary file at the current byte offset.
457 * The length of the Pascal string is determined by reading the length bytes
458 * which preceed the character data. You must supply the desired resulting
459 * character set.
461 * Advances the offset by the number of bytes read. Throws an exception if
462 * an error occurs.
464 * @param string $characterSet (optional) Desired resulting character set.
465 * You may use any character set supported by {@link iconv()}. If omitted,
466 * uses 'current locale'.
467 * @param integer $lengthBytes (optional) Number of bytes that make up the
468 * length. Default is 1.
469 * @return string
470 * @throws Zend_Pdf_Exception
472 public function readStringPascal($characterSet = '', $lengthBytes = 1)
474 $byteCount = $this->readUInt($lengthBytes);
475 if ($byteCount == 0) {
476 return '';
478 $bytes = $this->_dataSource->readBytes($byteCount);
479 if ($characterSet == 'ASCII') {
480 return $bytes;
482 return iconv('ASCII', $characterSet, $bytes);