*prechod na novsiu verziu ZF
[sport-group.git] / library / Zend / Pdf / StringParser.php
blob7e803a422f283aa6f40e1d5f303731101a919681
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @category Zend
16 * @package Zend_Pdf
17 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
18 * @license http://framework.zend.com/license/new-bsd New BSD License
19 * @version $Id: StringParser.php 17532 2009-08-10 19:04:14Z alexander $
23 /** Zend_Pdf_Element */
24 require_once 'Zend/Pdf/Element.php';
26 /** Zend_Pdf_Element_Array */
27 require_once 'Zend/Pdf/Element/Array.php';
29 /** Zend_Pdf_Element_String_Binary */
30 require_once 'Zend/Pdf/Element/String/Binary.php';
32 /** Zend_Pdf_Element_Boolean */
33 require_once 'Zend/Pdf/Element/Boolean.php';
35 /** Zend_Pdf_Element_Dictionary */
36 require_once 'Zend/Pdf/Element/Dictionary.php';
38 /** Zend_Pdf_Element_Name */
39 require_once 'Zend/Pdf/Element/Name.php';
41 /** Zend_Pdf_Element_Numeric */
42 require_once 'Zend/Pdf/Element/Numeric.php';
44 /** Zend_Pdf_Element_Object */
45 require_once 'Zend/Pdf/Element/Object.php';
47 /** Zend_Pdf_Element_Reference */
48 require_once 'Zend/Pdf/Element/Reference.php';
50 /** Zend_Pdf_Element_Object_Stream */
51 require_once 'Zend/Pdf/Element/Object/Stream.php';
53 /** Zend_Pdf_Element_String */
54 require_once 'Zend/Pdf/Element/String.php';
56 /** Zend_Pdf_Element_Null */
57 require_once 'Zend/Pdf/Element/Null.php';
59 /** Zend_Pdf_Element_Reference_Context */
60 require_once 'Zend/Pdf/Element/Reference/Context.php';
62 /** Zend_Pdf_Element_Reference_Table */
63 require_once 'Zend/Pdf/Element/Reference/Table.php';
65 /** Zend_Pdf_ElementFactory_Interface */
66 require_once 'Zend/Pdf/ElementFactory/Interface.php';
69 /**
70 * PDF string parser
72 * @package Zend_Pdf
73 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
74 * @license http://framework.zend.com/license/new-bsd New BSD License
76 class Zend_Pdf_StringParser
78 /**
79 * Source PDF
81 * @var string
83 public $data = '';
85 /**
86 * Current position in a data
88 * @var integer
90 public $offset = 0;
92 /**
93 * Current reference context
95 * @var Zend_Pdf_Element_Reference_Context
97 private $_context = null;
99 /**
100 * Array of elements of the currently parsed object/trailer
102 * @var array
104 private $_elements = array();
107 * PDF objects factory.
109 * @var Zend_Pdf_ElementFactory_Interface
111 private $_objFactory = null;
115 * Clean up resources.
117 * Clear current state to remove cyclic object references
119 public function cleanUp()
121 $this->_context = null;
122 $this->_elements = array();
123 $this->_objFactory = null;
127 * Character with code $chCode is white space
129 * @param integer $chCode
130 * @return boolean
132 public static function isWhiteSpace($chCode)
134 if ($chCode == 0x00 || // null character
135 $chCode == 0x09 || // Tab
136 $chCode == 0x0A || // Line feed
137 $chCode == 0x0C || // Form Feed
138 $chCode == 0x0D || // Carriage return
139 $chCode == 0x20 // Space
141 return true;
142 } else {
143 return false;
149 * Character with code $chCode is a delimiter character
151 * @param integer $chCode
152 * @return boolean
154 public static function isDelimiter($chCode )
156 if ($chCode == 0x28 || // '('
157 $chCode == 0x29 || // ')'
158 $chCode == 0x3C || // '<'
159 $chCode == 0x3E || // '>'
160 $chCode == 0x5B || // '['
161 $chCode == 0x5D || // ']'
162 $chCode == 0x7B || // '{'
163 $chCode == 0x7D || // '}'
164 $chCode == 0x2F || // '/'
165 $chCode == 0x25 // '%'
167 return true;
168 } else {
169 return false;
175 * Skip white space
177 * @param boolean $skipComment
179 public function skipWhiteSpace($skipComment = true)
181 while ($this->offset < strlen($this->data)) {
182 if (self::isWhiteSpace( ord($this->data[$this->offset]) )) {
183 $this->offset++;
184 } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
185 $this->skipComment();
186 } else {
187 return;
194 * Skip comment
196 public function skipComment()
198 while ($this->offset < strlen($this->data))
200 if (ord($this->data[$this->offset]) != 0x0A || // Line feed
201 ord($this->data[$this->offset]) != 0x0d // Carriage return
203 $this->offset++;
204 } else {
205 return;
212 * Read comment line
214 * @return string
216 public function readComment()
218 $this->skipWhiteSpace(false);
220 /** Check if it's a comment line */
221 if ($this->data[$this->offset] != '%') {
222 return '';
225 for ($start = $this->offset;
226 $this->offset < strlen($this->data);
227 $this->offset++) {
228 if (ord($this->data[$this->offset]) == 0x0A || // Line feed
229 ord($this->data[$this->offset]) == 0x0d // Carriage return
231 break;
235 return substr($this->data, $start, $this->offset-$start);
240 * Returns next lexeme from a pdf stream
242 * @return string
244 public function readLexeme()
246 // $this->skipWhiteSpace();
247 while (true) {
248 $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
250 if ($this->data[$this->offset] == '%') {
251 preg_match('/[\r\n]/', $this->data, $matches, PREG_OFFSET_CAPTURE, $this->offset);
252 if (count($matches) > 0) {
253 $this->offset += strlen($matches[0][0]) + $matches[0][1];
254 } else {
255 $this->offset = strlen($this->data);
257 } else {
258 break;
262 if ($this->offset >= strlen($this->data)) {
263 return '';
266 $start = $this->offset;
268 if (self::isDelimiter( ord($this->data[$start]) )) {
269 if ($this->data[$start] == '<' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '<') {
270 $this->offset += 2;
271 return '<<';
272 } else if ($this->data[$start] == '>' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '>') {
273 $this->offset += 2;
274 return '>>';
275 } else {
276 $this->offset++;
277 return $this->data[$start];
279 } else {
280 while ( ($this->offset < strlen($this->data)) &&
281 (!self::isDelimiter( ord($this->data[$this->offset]) )) &&
282 (!self::isWhiteSpace( ord($this->data[$this->offset]) )) ) {
283 $this->offset++;
286 return substr($this->data, $start, $this->offset - $start);
292 * Read elemental object from a PDF stream
294 * @return Zend_Pdf_Element
295 * @throws Zend_Pdf_Exception
297 public function readElement($nextLexeme = null)
299 if ($nextLexeme === null) {
300 $nextLexeme = $this->readLexeme();
304 * Note: readElement() method is a public method and could be invoked from other classes.
305 * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
306 * about _elements member management.
308 switch ($nextLexeme) {
309 case '(':
310 return ($this->_elements[] = $this->_readString());
312 case '<':
313 return ($this->_elements[] = $this->_readBinaryString());
315 case '/':
316 return ($this->_elements[] = new Zend_Pdf_Element_Name(
317 Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
320 case '[':
321 return ($this->_elements[] = $this->_readArray());
323 case '<<':
324 return ($this->_elements[] = $this->_readDictionary());
326 case ')':
327 // fall through to next case
328 case '>':
329 // fall through to next case
330 case ']':
331 // fall through to next case
332 case '>>':
333 // fall through to next case
334 case '{':
335 // fall through to next case
336 case '}':
337 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
338 $this->offset));
340 default:
341 if (strcasecmp($nextLexeme, 'true') == 0) {
342 return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
343 } else if (strcasecmp($nextLexeme, 'false') == 0) {
344 return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
345 } else if (strcasecmp($nextLexeme, 'null') == 0) {
346 return ($this->_elements[] = new Zend_Pdf_Element_Null());
349 $ref = $this->_readReference($nextLexeme);
350 if ($ref !== null) {
351 return ($this->_elements[] = $ref);
354 return ($this->_elements[] = $this->_readNumeric($nextLexeme));
360 * Read string PDF object
361 * Also reads trailing ')' from a pdf stream
363 * @return Zend_Pdf_Element_String
364 * @throws Zend_Pdf_Exception
366 private function _readString()
368 $start = $this->offset;
369 $openedBrackets = 1;
371 while ($this->offset < strlen($this->data)) {
372 switch (ord( $this->data[$this->offset] )) {
373 case 0x28: // '(' - opened bracket in the string, needs balanced pair.
374 $openedBrackets++;
375 break;
377 case 0x29: // ')' - pair to the opened bracket
378 $openedBrackets--;
379 break;
381 case 0x5C: // '\\' - escape sequence, skip next char from a check
382 $this->offset++;
385 $this->offset++;
386 if ($openedBrackets == 0) {
387 break; // end of string
390 if ($openedBrackets != 0) {
391 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
394 return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
395 $start,
396 $this->offset - $start - 1) ));
401 * Read binary string PDF object
402 * Also reads trailing '>' from a pdf stream
404 * @return Zend_Pdf_Element_String_Binary
405 * @throws Zend_Pdf_Exception
407 private function _readBinaryString()
409 $start = $this->offset;
411 while ($this->offset < strlen($this->data)) {
412 if (self::isWhiteSpace( ord($this->data[$this->offset]) ) ||
413 ctype_xdigit( $this->data[$this->offset] ) ) {
414 $this->offset++;
415 } else if ($this->data[$this->offset] == '>') {
416 $this->offset++;
417 return new Zend_Pdf_Element_String_Binary(
418 Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
419 $start,
420 $this->offset - $start - 1) ));
421 } else {
422 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
425 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while binary string reading. Offset - 0x%X. \'>\' expected.', $start));
430 * Read array PDF object
431 * Also reads trailing ']' from a pdf stream
433 * @return Zend_Pdf_Element_Array
434 * @throws Zend_Pdf_Exception
436 private function _readArray()
438 $elements = array();
440 while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
441 if ($nextLexeme != ']') {
442 $elements[] = $this->readElement($nextLexeme);
443 } else {
444 return new Zend_Pdf_Element_Array($elements);
448 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
453 * Read dictionary PDF object
454 * Also reads trailing '>>' from a pdf stream
456 * @return Zend_Pdf_Element_Dictionary
457 * @throws Zend_Pdf_Exception
459 private function _readDictionary()
461 $dictionary = new Zend_Pdf_Element_Dictionary();
463 while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
464 if ($nextLexeme != '>>') {
465 $nameStart = $this->offset - strlen($nextLexeme);
467 $name = $this->readElement($nextLexeme);
468 $value = $this->readElement();
470 if (!$name instanceof Zend_Pdf_Element_Name) {
471 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
474 $dictionary->add($name, $value);
475 } else {
476 return $dictionary;
480 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
485 * Read reference PDF object
487 * @param string $nextLexeme
488 * @return Zend_Pdf_Element_Reference
490 private function _readReference($nextLexeme = null)
492 $start = $this->offset;
494 if ($nextLexeme === null) {
495 $objNum = $this->readLexeme();
496 } else {
497 $objNum = $nextLexeme;
499 if (!ctype_digit($objNum)) { // it's not a reference
500 $this->offset = $start;
501 return null;
504 $genNum = $this->readLexeme();
505 if (!ctype_digit($genNum)) { // it's not a reference
506 $this->offset = $start;
507 return null;
510 $rMark = $this->readLexeme();
511 if ($rMark != 'R') { // it's not a reference
512 $this->offset = $start;
513 return null;
516 $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
518 return $ref;
523 * Read numeric PDF object
525 * @param string $nextLexeme
526 * @return Zend_Pdf_Element_Numeric
528 private function _readNumeric($nextLexeme = null)
530 if ($nextLexeme === null) {
531 $nextLexeme = $this->readLexeme();
534 return new Zend_Pdf_Element_Numeric($nextLexeme);
539 * Read inderect object from a PDF stream
541 * @param integer $offset
542 * @param Zend_Pdf_Element_Reference_Context $context
543 * @return Zend_Pdf_Element_Object
545 public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
547 if ($offset === null ) {
548 return new Zend_Pdf_Element_Null();
551 // Save current offset to make getObject() reentrant
552 $offsetSave = $this->offset;
554 $this->offset = $offset;
555 $this->_context = $context;
556 $this->_elements = array();
558 $objNum = $this->readLexeme();
559 if (!ctype_digit($objNum)) {
560 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
563 $genNum = $this->readLexeme();
564 if (!ctype_digit($genNum)) {
565 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
568 $objKeyword = $this->readLexeme();
569 if ($objKeyword != 'obj') {
570 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
573 $objValue = $this->readElement();
575 $nextLexeme = $this->readLexeme();
577 if( $nextLexeme == 'endobj' ) {
579 * Object is not generated by factory (thus it's not marked as modified object).
580 * But factory is assigned to the obect.
582 $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
584 foreach ($this->_elements as $element) {
585 $element->setParentObject($obj);
588 // Restore offset value
589 $this->offset = $offsetSave;
591 return $obj;
595 * It's a stream object
597 if ($nextLexeme != 'stream') {
598 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
601 if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
602 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
606 * References are automatically dereferenced at this moment.
608 $streamLength = $objValue->Length->value;
611 * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
612 * This restriction gives the possibility to recognize all cases exactly
614 if ($this->data[$this->offset] == "\r" &&
615 $this->data[$this->offset + 1] == "\n" ) {
616 $this->offset += 2;
617 } else if ($this->data[$this->offset] == "\n" ) {
618 $this->offset++;
619 } else {
620 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
623 $dataOffset = $this->offset;
625 $this->offset += $streamLength;
627 $nextLexeme = $this->readLexeme();
628 if ($nextLexeme != 'endstream') {
629 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
632 $nextLexeme = $this->readLexeme();
633 if ($nextLexeme != 'endobj') {
634 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
637 $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
638 $dataOffset,
639 $streamLength),
640 (int)$objNum,
641 (int)$genNum,
642 $this->_objFactory->resolve(),
643 $objValue);
645 foreach ($this->_elements as $element) {
646 $element->setParentObject($obj);
649 // Restore offset value
650 $this->offset = $offsetSave;
652 return $obj;
657 * Get length of source string
659 * @return integer
661 public function getLength()
663 return strlen($this->data);
667 * Get source string
669 * @return string
671 public function getString()
673 return $this->data;
678 * Parse integer value from a binary stream
680 * @param string $stream
681 * @param integer $offset
682 * @param integer $size
683 * @return integer
685 public static function parseIntFromStream($stream, $offset, $size)
687 $value = 0;
688 for ($count = 0; $count < $size; $count++) {
689 $value *= 256;
690 $value += ord($stream[$offset + $count]);
693 return $value;
699 * Set current context
701 * @param Zend_Pdf_Element_Reference_Context $context
703 public function setContext(Zend_Pdf_Element_Reference_Context $context)
705 $this->_context = $context;
709 * Object constructor
711 * Note: PHP duplicates string, which is sent by value, only of it's updated.
712 * Thus we don't need to care about overhead
714 * @param string $pdfString
715 * @param Zend_Pdf_ElementFactory_Interface $factory
717 public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
719 $this->data = $source;
720 $this->_objFactory = $factory;