[ZF-6295] Generic:
[zend.git] / library / Zend / Pdf / Parser.php
blobf2f3876a890f960ae156bf842d1a227faf877bc1
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @package Zend_Pdf
16 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
17 * @license http://framework.zend.com/license/new-bsd New BSD License
20 /** Zend_Pdf_Element */
21 require_once 'Zend/Pdf/Element.php';
23 /** Zend_Pdf_Element_Array */
24 require_once 'Zend/Pdf/Element/Array.php';
26 /** Zend_Pdf_Element_String_Binary */
27 require_once 'Zend/Pdf/Element/String/Binary.php';
29 /** Zend_Pdf_Element_Boolean */
30 require_once 'Zend/Pdf/Element/Boolean.php';
32 /** Zend_Pdf_Element_Dictionary */
33 require_once 'Zend/Pdf/Element/Dictionary.php';
35 /** Zend_Pdf_Element_Name */
36 require_once 'Zend/Pdf/Element/Name.php';
38 /** Zend_Pdf_Element_Numeric */
39 require_once 'Zend/Pdf/Element/Numeric.php';
41 /** Zend_Pdf_Element_Object */
42 require_once 'Zend/Pdf/Element/Object.php';
44 /** Zend_Pdf_Element_Reference */
45 require_once 'Zend/Pdf/Element/Reference.php';
47 /** Zend_Pdf_Element_Object_Stream */
48 require_once 'Zend/Pdf/Element/Object/Stream.php';
50 /** Zend_Pdf_Element_String */
51 require_once 'Zend/Pdf/Element/String.php';
53 /** Zend_Pdf_Element_Null */
54 require_once 'Zend/Pdf/Element/Null.php';
56 /** Zend_Pdf_Element_Reference_Context */
57 require_once 'Zend/Pdf/Element/Reference/Context.php';
59 /** Zend_Pdf_Element_Reference_Table */
60 require_once 'Zend/Pdf/Element/Reference/Table.php';
62 /** Zend_Pdf_Trailer_Keeper */
63 require_once 'Zend/Pdf/Trailer/Keeper.php';
65 /** Zend_Pdf_ElementFactory_Interface */
66 require_once 'Zend/Pdf/ElementFactory/Interface.php';
68 /** Zend_Pdf_PhpArray */
69 require_once 'Zend/Pdf/PhpArray.php';
71 /** Zend_Pdf_StringParser */
72 require_once 'Zend/Pdf/StringParser.php';
74 /** Zend_Pdf_Parser_Stream */
75 require_once 'Zend/Pdf/Parser/Stream.php';
78 /**
79 * PDF file parser
81 * @package Zend_Pdf
82 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
83 * @license http://framework.zend.com/license/new-bsd New BSD License
85 class Zend_Pdf_Parser
87 /**
88 * String parser
90 * @var Zend_Pdf_StringParser
92 private $_stringParser;
94 /**
95 * Last PDF file trailer
97 * @var Zend_Pdf_Trailer_Keeper
99 private $_trailer;
103 * Get length of source PDF
105 * @return integer
107 public function getPDFLength()
109 return strlen($this->_stringParser->data);
113 * Get PDF String
115 * @return string
117 public function getPDFString()
119 return $this->_stringParser->data;
123 * Load XReference table and referenced objects
125 * @param integer $offset
126 * @throws Zend_Pdf_Exception
127 * @return Zend_Pdf_Trailer_Keeper
129 private function _loadXRefTable($offset)
131 $this->_stringParser->offset = $offset;
133 $refTable = new Zend_Pdf_Element_Reference_Table();
134 $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
135 $this->_stringParser->setContext($context);
137 $nextLexeme = $this->_stringParser->readLexeme();
138 if ($nextLexeme == 'xref') {
140 * Common cross-reference table
142 $this->_stringParser->skipWhiteSpace();
143 while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
144 if (!ctype_digit($nextLexeme)) {
145 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
147 $objNum = (int)$nextLexeme;
149 $refCount = $this->_stringParser->readLexeme();
150 if (!ctype_digit($refCount)) {
151 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
154 $this->_stringParser->skipWhiteSpace();
155 while ($refCount > 0) {
156 $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
157 if (!ctype_digit($objectOffset)) {
158 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
160 // Force $objectOffset to be treated as decimal instead of octal number
161 for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
162 if ($objectOffset[$numStart] != '0') {
163 break;
166 $objectOffset = substr($objectOffset, $numStart);
167 $this->_stringParser->offset += 10;
169 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
170 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
172 $this->_stringParser->offset++;
174 $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
175 if (!ctype_digit($objectOffset)) {
176 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
178 // Force $objectOffset to be treated as decimal instead of octal number
179 for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
180 if ($genNumber[$numStart] != '0') {
181 break;
184 $genNumber = substr($genNumber, $numStart);
185 $this->_stringParser->offset += 5;
187 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
188 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
190 $this->_stringParser->offset++;
192 $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
193 $this->_stringParser->offset++;
195 switch ($inUseKey) {
196 case 'f':
197 // free entry
198 unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
199 $refTable->addReference($objNum . ' ' . $genNumber . ' R',
200 $objectOffset,
201 false);
202 break;
204 case 'n':
205 // in-use entry
207 $refTable->addReference($objNum . ' ' . $genNumber . ' R',
208 $objectOffset,
209 true);
212 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
213 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
215 $this->_stringParser->offset++;
216 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
217 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
219 $this->_stringParser->offset++;
221 $refCount--;
222 $objNum++;
226 $trailerDictOffset = $this->_stringParser->offset;
227 $trailerDict = $this->_stringParser->readElement();
228 if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
229 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
231 } else {
232 $xrefStream = $this->_stringParser->getObject($offset, $context);
234 if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
235 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset));
238 $trailerDict = $xrefStream->dictionary;
239 if ($trailerDict->Type->value != 'XRef') {
240 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
242 if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
243 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
246 $entryField1Size = $trailerDict->W->items[0]->value;
247 $entryField2Size = $trailerDict->W->items[1]->value;
248 $entryField3Size = $trailerDict->W->items[2]->value;
250 if ($entryField2Size == 0 || $entryField3Size == 0) {
251 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
254 $xrefStreamData = &$xrefStream->value;
256 if ($trailerDict->Index !== null) {
257 if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
258 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
260 $sections = count($trailerDict->Index->items)/2;
261 } else {
262 $sections = 1;
265 $streamOffset = 0;
267 $size = $entryField1Size + $entryField2Size + $entryField3Size;
268 $entries = strlen($xrefStreamData)/$size;
270 for ($count = 0; $count < $sections; $count++) {
271 if ($trailerDict->Index !== null) {
272 $objNum = $trailerDict->Index->items[$count*2 ]->value;
273 $entries = $trailerDict->Index->items[$count*2 + 1]->value;
274 } else {
275 $objNum = 0;
276 $entries = $trailerDict->Size->value;
279 for ($count2 = 0; $count2 < $entries; $count2++) {
280 if ($entryField1Size == 0) {
281 $type = 1;
282 } else if ($entryField1Size == 1) { // Optimyze one-byte field case
283 $type = ord($xrefStreamData[$streamOffset++]);
284 } else {
285 $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
286 $streamOffset += $entryField1Size;
289 if ($entryField2Size == 1) { // Optimyze one-byte field case
290 $field2 = ord($xrefStreamData[$streamOffset++]);
291 } else {
292 $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
293 $streamOffset += $entryField2Size;
296 if ($entryField3Size == 1) { // Optimyze one-byte field case
297 $field3 = ord($xrefStreamData[$streamOffset++]);
298 } else {
299 $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
300 $streamOffset += $entryField3Size;
303 switch ($type) {
304 case 0:
305 // Free object
306 $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
307 // Debug output:
308 // echo "Free object - $objNum $field3 R, next free - $field2\n";
309 break;
311 case 1:
312 // In use object
313 $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
314 // Debug output:
315 // echo "In-use object - $objNum $field3 R, offset - $field2\n";
316 break;
318 case 2:
319 // Object in an object stream
320 // Debug output:
321 // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
322 break;
325 $objNum++;
329 // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
330 // "$entries\n";
331 throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
335 $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
336 if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
337 $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
338 $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
339 $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
343 * We set '/Prev' dictionary property to the current cross-reference section offset.
344 * It doesn't correspond to the actual data, but is true when trailer will be used
345 * as a trailer for next generated PDF section.
347 $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
349 return $trailerObj;
354 * Get Trailer object
356 * @return Zend_Pdf_Trailer_Keeper
358 public function getTrailer()
360 return $this->_trailer;
364 * Object constructor
366 * Note: PHP duplicates string, which is sent by value, only of it's updated.
367 * Thus we don't need to care about overhead
369 * @param mixed $source
370 * @param Zend_Pdf_ElementFactory_Interface $factory
371 * @param boolean $load
372 * @throws Zend_Exception
374 public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
376 if ($load) {
377 if (($pdfFile = @fopen($source, 'rb')) === false ) {
378 throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
381 $byteCount = filesize($source);
383 $data = fread($pdfFile, $byteCount);
384 $byteCount -= strlen($data);
385 while ( $byteCount > 0 && ($nextBlock = fread($pdfFile, $byteCount)) != false ) {
386 $data .= $nextBlock;
387 $byteCount -= strlen($nextBlock);
389 fclose($pdfFile);
391 $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
392 } else {
393 $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
396 $pdfVersionComment = $this->_stringParser->readComment();
397 if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
398 throw new Zend_Pdf_Exception('File is not a PDF.');
401 $pdfVersion = (float)substr($pdfVersionComment, 5);
402 if ($pdfVersion < 0.9 || $pdfVersion >= 1.61) {
404 * @todo
405 * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
406 * Stream compression filter must be implemented (for compressed object streams).
407 * Cross reference streams must be implemented
409 throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
412 $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
413 if ($this->_stringParser->offset === false ||
414 strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
415 throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
418 $this->_stringParser->offset--;
420 * Go to end of cross-reference table offset
422 while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
423 ($this->_stringParser->offset > 0)) {
424 $this->_stringParser->offset--;
427 * Go to the start of cross-reference table offset
429 while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
430 ($this->_stringParser->offset > 0)) {
431 $this->_stringParser->offset--;
434 * Go to the end of 'startxref' keyword
436 while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
437 ($this->_stringParser->offset > 0)) {
438 $this->_stringParser->offset--;
441 * Go to the white space (eol marker) before 'startxref' keyword
443 $this->_stringParser->offset -= 9;
445 $nextLexeme = $this->_stringParser->readLexeme();
446 if ($nextLexeme != 'startxref') {
447 throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
450 $startXref = $this->_stringParser->readLexeme();
451 if (!ctype_digit($startXref)) {
452 throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
455 $this->_trailer = $this->_loadXRefTable($startXref);
456 $factory->setObjectCount($this->_trailer->Size->value);
461 * Object destructor
463 public function __destruct()
465 $this->_stringParser->cleanUp();