7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
17 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
18 * @license http://framework.zend.com/license/new-bsd New BSD License
19 * @version $Id: StringParser.php 17532 2009-08-10 19:04:14Z alexander $
23 /** Zend_Pdf_Element */
24 require_once 'Zend/Pdf/Element.php';
26 /** Zend_Pdf_Element_Array */
27 require_once 'Zend/Pdf/Element/Array.php';
29 /** Zend_Pdf_Element_String_Binary */
30 require_once 'Zend/Pdf/Element/String/Binary.php';
32 /** Zend_Pdf_Element_Boolean */
33 require_once 'Zend/Pdf/Element/Boolean.php';
35 /** Zend_Pdf_Element_Dictionary */
36 require_once 'Zend/Pdf/Element/Dictionary.php';
38 /** Zend_Pdf_Element_Name */
39 require_once 'Zend/Pdf/Element/Name.php';
41 /** Zend_Pdf_Element_Numeric */
42 require_once 'Zend/Pdf/Element/Numeric.php';
44 /** Zend_Pdf_Element_Object */
45 require_once 'Zend/Pdf/Element/Object.php';
47 /** Zend_Pdf_Element_Reference */
48 require_once 'Zend/Pdf/Element/Reference.php';
50 /** Zend_Pdf_Element_Object_Stream */
51 require_once 'Zend/Pdf/Element/Object/Stream.php';
53 /** Zend_Pdf_Element_String */
54 require_once 'Zend/Pdf/Element/String.php';
56 /** Zend_Pdf_Element_Null */
57 require_once 'Zend/Pdf/Element/Null.php';
59 /** Zend_Pdf_Element_Reference_Context */
60 require_once 'Zend/Pdf/Element/Reference/Context.php';
62 /** Zend_Pdf_Element_Reference_Table */
63 require_once 'Zend/Pdf/Element/Reference/Table.php';
65 /** Zend_Pdf_ElementFactory_Interface */
66 require_once 'Zend/Pdf/ElementFactory/Interface.php';
73 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
74 * @license http://framework.zend.com/license/new-bsd New BSD License
76 class Zend_Pdf_StringParser
86 * Current position in a data
93 * Current reference context
95 * @var Zend_Pdf_Element_Reference_Context
97 private $_context = null;
100 * Array of elements of the currently parsed object/trailer
104 private $_elements = array();
107 * PDF objects factory.
109 * @var Zend_Pdf_ElementFactory_Interface
111 private $_objFactory = null;
115 * Clean up resources.
117 * Clear current state to remove cyclic object references
119 public function cleanUp()
121 $this->_context
= null;
122 $this->_elements
= array();
123 $this->_objFactory
= null;
127 * Character with code $chCode is white space
129 * @param integer $chCode
132 public static function isWhiteSpace($chCode)
134 if ($chCode == 0x00 ||
// null character
135 $chCode == 0x09 ||
// Tab
136 $chCode == 0x0A ||
// Line feed
137 $chCode == 0x0C ||
// Form Feed
138 $chCode == 0x0D ||
// Carriage return
139 $chCode == 0x20 // Space
149 * Character with code $chCode is a delimiter character
151 * @param integer $chCode
154 public static function isDelimiter($chCode )
156 if ($chCode == 0x28 ||
// '('
157 $chCode == 0x29 ||
// ')'
158 $chCode == 0x3C ||
// '<'
159 $chCode == 0x3E ||
// '>'
160 $chCode == 0x5B ||
// '['
161 $chCode == 0x5D ||
// ']'
162 $chCode == 0x7B ||
// '{'
163 $chCode == 0x7D ||
// '}'
164 $chCode == 0x2F ||
// '/'
165 $chCode == 0x25 // '%'
177 * @param boolean $skipComment
179 public function skipWhiteSpace($skipComment = true)
181 while ($this->offset
< strlen($this->data
)) {
182 if (self
::isWhiteSpace( ord($this->data
[$this->offset
]) )) {
184 } else if (ord($this->data
[$this->offset
]) == 0x25 && $skipComment) { // '%'
185 $this->skipComment();
196 public function skipComment()
198 while ($this->offset
< strlen($this->data
))
200 if (ord($this->data
[$this->offset
]) != 0x0A ||
// Line feed
201 ord($this->data
[$this->offset
]) != 0x0d // Carriage return
216 public function readComment()
218 $this->skipWhiteSpace(false);
220 /** Check if it's a comment line */
221 if ($this->data
[$this->offset
] != '%') {
225 for ($start = $this->offset
;
226 $this->offset
< strlen($this->data
);
228 if (ord($this->data
[$this->offset
]) == 0x0A ||
// Line feed
229 ord($this->data
[$this->offset
]) == 0x0d // Carriage return
235 return substr($this->data
, $start, $this->offset
-$start);
240 * Returns next lexeme from a pdf stream
244 public function readLexeme()
246 // $this->skipWhiteSpace();
248 $this->offset +
= strspn($this->data
, "\x00\t\n\f\r ", $this->offset
);
250 if ($this->data
[$this->offset
] == '%') {
251 preg_match('/[\r\n]/', $this->data
, $matches, PREG_OFFSET_CAPTURE
, $this->offset
);
252 if (count($matches) > 0) {
253 $this->offset +
= strlen($matches[0][0]) +
$matches[0][1];
255 $this->offset
= strlen($this->data
);
262 if ($this->offset
>= strlen($this->data
)) {
266 $start = $this->offset
;
268 if (self
::isDelimiter( ord($this->data
[$start]) )) {
269 if ($this->data
[$start] == '<' && $this->offset +
1 < strlen($this->data
) && $this->data
[$start+
1] == '<') {
272 } else if ($this->data
[$start] == '>' && $this->offset +
1 < strlen($this->data
) && $this->data
[$start+
1] == '>') {
277 return $this->data
[$start];
280 while ( ($this->offset
< strlen($this->data
)) &&
281 (!self
::isDelimiter( ord($this->data
[$this->offset
]) )) &&
282 (!self
::isWhiteSpace( ord($this->data
[$this->offset
]) )) ) {
286 return substr($this->data
, $start, $this->offset
- $start);
292 * Read elemental object from a PDF stream
294 * @return Zend_Pdf_Element
295 * @throws Zend_Pdf_Exception
297 public function readElement($nextLexeme = null)
299 if ($nextLexeme === null) {
300 $nextLexeme = $this->readLexeme();
304 * Note: readElement() method is a public method and could be invoked from other classes.
305 * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
306 * about _elements member management.
308 switch ($nextLexeme) {
310 return ($this->_elements
[] = $this->_readString());
313 return ($this->_elements
[] = $this->_readBinaryString());
316 return ($this->_elements
[] = new Zend_Pdf_Element_Name(
317 Zend_Pdf_Element_Name
::unescape( $this->readLexeme() )
321 return ($this->_elements
[] = $this->_readArray());
324 return ($this->_elements
[] = $this->_readDictionary());
327 // fall through to next case
329 // fall through to next case
331 // fall through to next case
333 // fall through to next case
335 // fall through to next case
337 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
341 if (strcasecmp($nextLexeme, 'true') == 0) {
342 return ($this->_elements
[] = new Zend_Pdf_Element_Boolean(true));
343 } else if (strcasecmp($nextLexeme, 'false') == 0) {
344 return ($this->_elements
[] = new Zend_Pdf_Element_Boolean(false));
345 } else if (strcasecmp($nextLexeme, 'null') == 0) {
346 return ($this->_elements
[] = new Zend_Pdf_Element_Null());
349 $ref = $this->_readReference($nextLexeme);
351 return ($this->_elements
[] = $ref);
354 return ($this->_elements
[] = $this->_readNumeric($nextLexeme));
360 * Read string PDF object
361 * Also reads trailing ')' from a pdf stream
363 * @return Zend_Pdf_Element_String
364 * @throws Zend_Pdf_Exception
366 private function _readString()
368 $start = $this->offset
;
371 while ($this->offset
< strlen($this->data
)) {
372 switch (ord( $this->data
[$this->offset
] )) {
373 case 0x28: // '(' - opened bracket in the string, needs balanced pair.
377 case 0x29: // ')' - pair to the opened bracket
381 case 0x5C: // '\\' - escape sequence, skip next char from a check
386 if ($openedBrackets == 0) {
387 break; // end of string
390 if ($openedBrackets != 0) {
391 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
394 return new Zend_Pdf_Element_String(Zend_Pdf_Element_String
::unescape( substr($this->data
,
396 $this->offset
- $start - 1) ));
401 * Read binary string PDF object
402 * Also reads trailing '>' from a pdf stream
404 * @return Zend_Pdf_Element_String_Binary
405 * @throws Zend_Pdf_Exception
407 private function _readBinaryString()
409 $start = $this->offset
;
411 while ($this->offset
< strlen($this->data
)) {
412 if (self
::isWhiteSpace( ord($this->data
[$this->offset
]) ) ||
413 ctype_xdigit( $this->data
[$this->offset
] ) ) {
415 } else if ($this->data
[$this->offset
] == '>') {
417 return new Zend_Pdf_Element_String_Binary(
418 Zend_Pdf_Element_String_Binary
::unescape( substr($this->data
,
420 $this->offset
- $start - 1) ));
422 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset
));
425 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while binary string reading. Offset - 0x%X. \'>\' expected.', $start));
430 * Read array PDF object
431 * Also reads trailing ']' from a pdf stream
433 * @return Zend_Pdf_Element_Array
434 * @throws Zend_Pdf_Exception
436 private function _readArray()
440 while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
441 if ($nextLexeme != ']') {
442 $elements[] = $this->readElement($nextLexeme);
444 return new Zend_Pdf_Element_Array($elements);
448 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset
));
453 * Read dictionary PDF object
454 * Also reads trailing '>>' from a pdf stream
456 * @return Zend_Pdf_Element_Dictionary
457 * @throws Zend_Pdf_Exception
459 private function _readDictionary()
461 $dictionary = new Zend_Pdf_Element_Dictionary();
463 while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
464 if ($nextLexeme != '>>') {
465 $nameStart = $this->offset
- strlen($nextLexeme);
467 $name = $this->readElement($nextLexeme);
468 $value = $this->readElement();
470 if (!$name instanceof Zend_Pdf_Element_Name
) {
471 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
474 $dictionary->add($name, $value);
480 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset
));
485 * Read reference PDF object
487 * @param string $nextLexeme
488 * @return Zend_Pdf_Element_Reference
490 private function _readReference($nextLexeme = null)
492 $start = $this->offset
;
494 if ($nextLexeme === null) {
495 $objNum = $this->readLexeme();
497 $objNum = $nextLexeme;
499 if (!ctype_digit($objNum)) { // it's not a reference
500 $this->offset
= $start;
504 $genNum = $this->readLexeme();
505 if (!ctype_digit($genNum)) { // it's not a reference
506 $this->offset
= $start;
510 $rMark = $this->readLexeme();
511 if ($rMark != 'R') { // it's not a reference
512 $this->offset
= $start;
516 $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context
, $this->_objFactory
->resolve());
523 * Read numeric PDF object
525 * @param string $nextLexeme
526 * @return Zend_Pdf_Element_Numeric
528 private function _readNumeric($nextLexeme = null)
530 if ($nextLexeme === null) {
531 $nextLexeme = $this->readLexeme();
534 return new Zend_Pdf_Element_Numeric($nextLexeme);
539 * Read inderect object from a PDF stream
541 * @param integer $offset
542 * @param Zend_Pdf_Element_Reference_Context $context
543 * @return Zend_Pdf_Element_Object
545 public function getObject($offset, Zend_Pdf_Element_Reference_Context
$context)
547 if ($offset === null ) {
548 return new Zend_Pdf_Element_Null();
551 // Save current offset to make getObject() reentrant
552 $offsetSave = $this->offset
;
554 $this->offset
= $offset;
555 $this->_context
= $context;
556 $this->_elements
= array();
558 $objNum = $this->readLexeme();
559 if (!ctype_digit($objNum)) {
560 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset
- strlen($objNum)));
563 $genNum = $this->readLexeme();
564 if (!ctype_digit($genNum)) {
565 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset
- strlen($genNum)));
568 $objKeyword = $this->readLexeme();
569 if ($objKeyword != 'obj') {
570 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset
- strlen($objKeyword)));
573 $objValue = $this->readElement();
575 $nextLexeme = $this->readLexeme();
577 if( $nextLexeme == 'endobj' ) {
579 * Object is not generated by factory (thus it's not marked as modified object).
580 * But factory is assigned to the obect.
582 $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory
->resolve());
584 foreach ($this->_elements
as $element) {
585 $element->setParentObject($obj);
588 // Restore offset value
589 $this->offset
= $offsetSave;
595 * It's a stream object
597 if ($nextLexeme != 'stream') {
598 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset
- strlen($nextLexeme)));
601 if (!$objValue instanceof Zend_Pdf_Element_Dictionary
) {
602 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset
- strlen($nextLexeme)));
606 * References are automatically dereferenced at this moment.
608 $streamLength = $objValue->Length
->value
;
611 * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
612 * This restriction gives the possibility to recognize all cases exactly
614 if ($this->data
[$this->offset
] == "\r" &&
615 $this->data
[$this->offset +
1] == "\n" ) {
617 } else if ($this->data
[$this->offset
] == "\n" ) {
620 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset
- strlen($nextLexeme)));
623 $dataOffset = $this->offset
;
625 $this->offset +
= $streamLength;
627 $nextLexeme = $this->readLexeme();
628 if ($nextLexeme != 'endstream') {
629 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset
- strlen($nextLexeme)));
632 $nextLexeme = $this->readLexeme();
633 if ($nextLexeme != 'endobj') {
634 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset
- strlen($nextLexeme)));
637 $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data
,
642 $this->_objFactory
->resolve(),
645 foreach ($this->_elements
as $element) {
646 $element->setParentObject($obj);
649 // Restore offset value
650 $this->offset
= $offsetSave;
657 * Get length of source string
661 public function getLength()
663 return strlen($this->data
);
671 public function getString()
678 * Parse integer value from a binary stream
680 * @param string $stream
681 * @param integer $offset
682 * @param integer $size
685 public static function parseIntFromStream($stream, $offset, $size)
688 for ($count = 0; $count < $size; $count++
) {
690 $value +
= ord($stream[$offset +
$count]);
699 * Set current context
701 * @param Zend_Pdf_Element_Reference_Context $context
703 public function setContext(Zend_Pdf_Element_Reference_Context
$context)
705 $this->_context
= $context;
711 * Note: PHP duplicates string, which is sent by value, only of it's updated.
712 * Thus we don't need to care about overhead
714 * @param string $pdfString
715 * @param Zend_Pdf_ElementFactory_Interface $factory
717 public function __construct($source, Zend_Pdf_ElementFactory_Interface
$factory)
719 $this->data
= $source;
720 $this->_objFactory
= $factory;