1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
29 #include "ErrorCodes.h"
32 //------------------------------------------------------------------------
34 #define xrefSearchSize 1024 // read this many bytes at end of file
35 // to look for 'startxref'
38 //------------------------------------------------------------------------
40 //------------------------------------------------------------------------
42 #define permPrint (1<<2)
43 #define permChange (1<<3)
44 #define permCopy (1<<4)
45 #define permNotes (1<<5)
46 #define defPermFlags 0xfffc
49 //------------------------------------------------------------------------
51 //------------------------------------------------------------------------
56 // Create an object stream, using object number <objStrNum>,
58 ObjectStream(XRef
*xref
, int objStrNumA
);
62 // Return the object number of this object stream.
63 int getObjStrNum() { return objStrNum
; }
65 // Get the <objIdx>th object from this stream, which should be
66 // object number <objNum>, generation 0.
67 Object
*getObject(int objIdx
, int objNum
, Object
*obj
);
71 int objStrNum
; // object number of the object stream
72 int nObjects
; // number of objects in the stream
73 Object
*objs
; // the objects (length = nObjects)
74 int *objNums
; // the object numbers (length = nObjects)
77 ObjectStream::ObjectStream(XRef
*xref
, int objStrNumA
) {
81 Object objStr
, obj1
, obj2
;
84 objStrNum
= objStrNumA
;
89 if (!xref
->fetch(objStrNum
, 0, &objStr
)->isStream()) {
93 if (!objStr
.streamGetDict()->lookup("N", &obj1
)->isInt()) {
97 nObjects
= obj1
.getInt();
103 if (!objStr
.streamGetDict()->lookup("First", &obj1
)->isInt()) {
107 first
= obj1
.getInt();
110 objs
= new Object
[nObjects
];
111 objNums
= (int *)gmalloc(nObjects
* sizeof(int));
112 offsets
= (int *)gmalloc(nObjects
* sizeof(int));
114 // parse the header: object numbers and offsets
115 objStr
.streamReset();
117 str
= new EmbedStream(objStr
.getStream(), &obj1
, gTrue
, first
);
118 parser
= new Parser(xref
, new Lexer(xref
, str
));
119 for (i
= 0; i
< nObjects
; ++i
) {
120 parser
->getObj(&obj1
);
121 parser
->getObj(&obj2
);
122 if (!obj1
.isInt() || !obj2
.isInt()) {
129 objNums
[i
] = obj1
.getInt();
130 offsets
[i
] = obj2
.getInt();
134 while (str
->getChar() != EOF
) ;
137 // skip to the first object - this shouldn't be necessary because
138 // the First key is supposed to be equal to offsets[0], but just in
140 for (i
= first
; i
< offsets
[0]; ++i
) {
141 objStr
.getStream()->getChar();
145 for (i
= 0; i
< nObjects
; ++i
) {
147 if (i
== nObjects
- 1) {
148 str
= new EmbedStream(objStr
.getStream(), &obj1
, gFalse
, 0);
150 str
= new EmbedStream(objStr
.getStream(), &obj1
, gTrue
,
151 offsets
[i
+1] - offsets
[i
]);
153 parser
= new Parser(xref
, new Lexer(xref
, str
));
154 parser
->getObj(&objs
[i
]);
155 while (str
->getChar() != EOF
) ;
166 ObjectStream::~ObjectStream() {
170 for (i
= 0; i
< nObjects
; ++i
) {
178 Object
*ObjectStream::getObject(int objIdx
, int objNum
, Object
*obj
) {
179 if (objIdx
< 0 || objIdx
>= nObjects
|| objNum
!= objNums
[objIdx
]) {
180 return obj
->initNull();
182 return objs
[objIdx
].copy(obj
);
185 //------------------------------------------------------------------------
187 //------------------------------------------------------------------------
189 XRef::XRef(BaseStream
*strA
, GString
*ownerPassword
, GString
*userPassword
) {
203 start
= str
->getStart();
204 pos
= getStartXref();
206 // if there was a problem with the 'startxref' position, try to
207 // reconstruct the xref table
209 if (!(ok
= constructXRef())) {
210 errCode
= errDamaged
;
214 // read the xref table
216 while (readXRef(&pos
)) ;
218 // if there was a problem with the xref table,
219 // try to reconstruct it
221 if (!(ok
= constructXRef())) {
222 errCode
= errDamaged
;
228 // get the root dictionary (catalog) object
229 trailerDict
.dictLookupNF("Root", &obj
);
231 rootNum
= obj
.getRefNum();
232 rootGen
= obj
.getRefGen();
236 if (!(ok
= constructXRef())) {
237 errCode
= errDamaged
;
242 // now set the trailer dictionary's xref pointer so we can fetch
243 // indirect objects from it
244 trailerDict
.getDict()->setXRef(this);
246 // check for encryption
247 #ifndef NO_DECRYPTION
250 if (checkEncrypted(ownerPassword
, userPassword
)) {
252 errCode
= errEncrypted
;
268 // Read the 'startxref' position.
269 Guint
XRef::getStartXref() {
270 char buf
[xrefSearchSize
+1];
274 // read last xrefSearchSize bytes
275 str
->setPos(xrefSearchSize
, -1);
276 for (n
= 0; n
< xrefSearchSize
; ++n
) {
277 if ((c
= str
->getChar()) == EOF
) {
285 for (i
= n
- 9; i
>= 0; --i
) {
286 if (!strncmp(&buf
[i
], "startxref", 9)) {
293 for (p
= &buf
[i
+9]; isspace(*p
); ++p
) ;
294 lastXRefPos
= strToUnsigned(p
);
299 // Read one xref table section. Also reads the associated trailer
300 // dictionary, and returns the prev pointer (if any).
301 GBool
XRef::readXRef(Guint
*pos
) {
306 // start up a parser, parse one token
308 parser
= new Parser(NULL
,
310 str
->makeSubStream(start
+ *pos
, gFalse
, 0, &obj
)));
311 parser
->getObj(&obj
);
313 // parse an old-style xref table
314 if (obj
.isCmd("xref")) {
316 more
= readXRefTable(parser
, pos
);
318 // parse an xref stream
319 } else if (obj
.isInt()) {
321 if (!parser
->getObj(&obj
)->isInt()) {
325 if (!parser
->getObj(&obj
)->isCmd("obj")) {
329 if (!parser
->getObj(&obj
)->isStream()) {
332 more
= readXRefStream(obj
.getStream(), pos
);
349 GBool
XRef::readXRefTable(Parser
*parser
, Guint
*pos
) {
354 int first
, n
, newSize
, i
;
357 parser
->getObj(&obj
);
358 if (obj
.isCmd("trailer")) {
365 first
= obj
.getInt();
367 if (!parser
->getObj(&obj
)->isInt()) {
372 if (first
+ n
> size
) {
373 for (newSize
= size
? 2 * size
: 1024;
376 entries
= (XRefEntry
*)grealloc(entries
, newSize
* sizeof(XRefEntry
));
377 for (i
= size
; i
< newSize
; ++i
) {
378 entries
[i
].offset
= 0xffffffff;
379 entries
[i
].type
= xrefEntryFree
;
383 for (i
= first
; i
< first
+ n
; ++i
) {
384 if (!parser
->getObj(&obj
)->isInt()) {
387 entry
.offset
= (Guint
)obj
.getInt();
389 if (!parser
->getObj(&obj
)->isInt()) {
392 entry
.gen
= obj
.getInt();
394 parser
->getObj(&obj
);
395 if (obj
.isCmd("n")) {
396 entry
.type
= xrefEntryUncompressed
;
397 } else if (obj
.isCmd("f")) {
398 entry
.type
= xrefEntryFree
;
403 if (entries
[i
].offset
== 0xffffffff) {
405 // PDF files of patents from the IBM Intellectual Property
406 // Network have a bug: the xref table claims to start at 1
408 if (i
== 1 && first
== 1 &&
409 entries
[1].offset
== 0 && entries
[1].gen
== 65535 &&
410 entries
[1].type
== xrefEntryFree
) {
412 entries
[0] = entries
[1];
413 entries
[1].offset
= 0xffffffff;
419 // read the trailer dictionary
420 if (!parser
->getObj(&obj
)->isDict()) {
424 // get the 'Prev' pointer
425 obj
.getDict()->lookupNF("Prev", &obj2
);
427 *pos
= (Guint
)obj2
.getInt();
429 } else if (obj2
.isRef()) {
430 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
432 *pos
= (Guint
)obj2
.getRefNum();
439 // save the first trailer dictionary
440 if (trailerDict
.isNone()) {
441 obj
.copy(&trailerDict
);
444 // check for an 'XRefStm' key
445 if (obj
.getDict()->lookup("XRefStm", &obj2
)->isInt()) {
446 pos2
= obj2
.getInt();
463 GBool
XRef::readXRefStream(Stream
*xrefStr
, Guint
*pos
) {
467 Object obj
, obj2
, idx
;
468 int newSize
, first
, n
, i
;
470 dict
= xrefStr
->getDict();
472 if (!dict
->lookupNF("Size", &obj
)->isInt()) {
475 newSize
= obj
.getInt();
477 if (newSize
> size
) {
478 entries
= (XRefEntry
*)grealloc(entries
, newSize
* sizeof(XRefEntry
));
479 for (i
= size
; i
< newSize
; ++i
) {
480 entries
[i
].offset
= 0xffffffff;
481 entries
[i
].type
= xrefEntryFree
;
486 if (!dict
->lookupNF("W", &obj
)->isArray() ||
487 obj
.arrayGetLength() < 3) {
490 for (i
= 0; i
< 3; ++i
) {
491 if (!obj
.arrayGet(i
, &obj2
)->isInt()) {
495 w
[i
] = obj2
.getInt();
501 dict
->lookupNF("Index", &idx
);
503 for (i
= 0; i
+1 < idx
.arrayGetLength(); i
+= 2) {
504 if (!idx
.arrayGet(i
, &obj
)->isInt()) {
508 first
= obj
.getInt();
510 if (!idx
.arrayGet(i
+1, &obj
)->isInt()) {
516 if (!readXRefStreamSection(xrefStr
, w
, first
, n
)) {
522 if (!readXRefStreamSection(xrefStr
, w
, 0, size
)) {
529 dict
->lookupNF("Prev", &obj
);
531 *pos
= (Guint
)obj
.getInt();
537 if (trailerDict
.isNone()) {
538 trailerDict
.initDict(dict
);
550 GBool
XRef::readXRefStreamSection(Stream
*xrefStr
, int *w
, int first
, int n
) {
552 int type
, gen
, c
, newSize
, i
, j
;
554 if (first
+ n
> size
) {
555 for (newSize
= size
? 2 * size
: 1024;
558 entries
= (XRefEntry
*)grealloc(entries
, newSize
* sizeof(XRefEntry
));
559 for (i
= size
; i
< newSize
; ++i
) {
560 entries
[i
].offset
= 0xffffffff;
561 entries
[i
].type
= xrefEntryFree
;
565 for (i
= first
; i
< first
+ n
; ++i
) {
569 for (type
= 0, j
= 0; j
< w
[0]; ++j
) {
570 if ((c
= xrefStr
->getChar()) == EOF
) {
573 type
= (type
<< 8) + c
;
576 for (offset
= 0, j
= 0; j
< w
[1]; ++j
) {
577 if ((c
= xrefStr
->getChar()) == EOF
) {
580 offset
= (offset
<< 8) + c
;
582 for (gen
= 0, j
= 0; j
< w
[2]; ++j
) {
583 if ((c
= xrefStr
->getChar()) == EOF
) {
586 gen
= (gen
<< 8) + c
;
590 entries
[i
].offset
= offset
;
591 entries
[i
].gen
= gen
;
592 entries
[i
].type
= xrefEntryFree
;
595 entries
[i
].offset
= offset
;
596 entries
[i
].gen
= gen
;
597 entries
[i
].type
= xrefEntryUncompressed
;
600 entries
[i
].offset
= offset
;
601 entries
[i
].gen
= gen
;
602 entries
[i
].type
= xrefEntryCompressed
;
612 // Attempt to construct an xref table for a damaged file.
613 GBool
XRef::constructXRef() {
615 Object newTrailerDict
, obj
;
629 error(0, "PDF file is damaged - attempting to reconstruct xref table...");
631 streamEndsLen
= streamEndsSize
= 0;
636 if (!str
->getLine(buf
, 256)) {
641 // got trailer dictionary
642 if (!strncmp(p
, "trailer", 7)) {
644 parser
= new Parser(NULL
,
646 str
->makeSubStream(start
+ pos
+ 7, gFalse
, 0, &obj
)));
647 parser
->getObj(&newTrailerDict
);
648 if (newTrailerDict
.isDict()) {
649 newTrailerDict
.dictLookupNF("Root", &obj
);
651 rootNum
= obj
.getRefNum();
652 rootGen
= obj
.getRefGen();
653 if (!trailerDict
.isNone()) {
656 newTrailerDict
.copy(&trailerDict
);
661 newTrailerDict
.free();
665 } else if (isdigit(*p
)) {
669 } while (*p
&& isdigit(*p
));
673 } while (*p
&& isspace(*p
));
678 } while (*p
&& isdigit(*p
));
682 } while (*p
&& isspace(*p
));
683 if (!strncmp(p
, "obj", 3)) {
685 newSize
= (num
+ 1 + 255) & ~255;
686 entries
= (XRefEntry
*)
687 grealloc(entries
, newSize
* sizeof(XRefEntry
));
688 for (i
= size
; i
< newSize
; ++i
) {
689 entries
[i
].offset
= 0xffffffff;
690 entries
[i
].type
= xrefEntryFree
;
694 if (entries
[num
].type
== xrefEntryFree
||
695 gen
>= entries
[num
].gen
) {
696 entries
[num
].offset
= pos
- start
;
697 entries
[num
].gen
= gen
;
698 entries
[num
].type
= xrefEntryUncompressed
;
705 } else if (!strncmp(p
, "endstream", 9)) {
706 if (streamEndsLen
== streamEndsSize
) {
707 streamEndsSize
+= 64;
708 streamEnds
= (Guint
*)grealloc(streamEnds
,
709 streamEndsSize
* sizeof(int));
711 streamEnds
[streamEndsLen
++] = pos
;
718 error(-1, "Couldn't find trailer dictionary");
722 #ifndef NO_DECRYPTION
723 GBool
XRef::checkEncrypted(GString
*ownerPassword
, GString
*userPassword
) {
724 Object encrypt
, filterObj
, versionObj
, revisionObj
, lengthObj
;
725 Object ownerKey
, userKey
, permissions
, fileID
, fileID1
;
730 encVersion
= encRevision
= 0;
733 permFlags
= defPermFlags
;
734 ownerPasswordOk
= gFalse
;
735 trailerDict
.dictLookup("Encrypt", &encrypt
);
736 if ((encrypted1
= encrypt
.isDict())) {
738 encrypt
.dictLookup("Filter", &filterObj
);
739 if (filterObj
.isName("Standard")) {
740 encrypt
.dictLookup("V", &versionObj
);
741 encrypt
.dictLookup("R", &revisionObj
);
742 encrypt
.dictLookup("Length", &lengthObj
);
743 encrypt
.dictLookup("O", &ownerKey
);
744 encrypt
.dictLookup("U", &userKey
);
745 encrypt
.dictLookup("P", &permissions
);
746 trailerDict
.dictLookup("ID", &fileID
);
747 if (versionObj
.isInt() &&
748 revisionObj
.isInt() &&
749 ownerKey
.isString() && ownerKey
.getString()->getLength() == 32 &&
750 userKey
.isString() && userKey
.getString()->getLength() == 32 &&
751 permissions
.isInt() &&
753 encVersion
= versionObj
.getInt();
754 encRevision
= revisionObj
.getInt();
755 if (lengthObj
.isInt()) {
756 keyLength
= lengthObj
.getInt() / 8;
760 permFlags
= permissions
.getInt();
761 if (encVersion
>= 1 && encVersion
<= 2 &&
762 encRevision
>= 2 && encRevision
<= 3) {
763 fileID
.arrayGet(0, &fileID1
);
764 if (fileID1
.isString()) {
765 if (Decrypt::makeFileKey(encVersion
, encRevision
, keyLength
,
766 ownerKey
.getString(), userKey
.getString(),
767 permFlags
, fileID1
.getString(),
768 ownerPassword
, userPassword
, fileKey
,
770 if (ownerPassword
&& !ownerPasswordOk
) {
771 error(-1, "Incorrect owner password");
775 error(-1, "Incorrect password");
778 error(-1, "Weird encryption info");
782 error(-1, "Unsupported version/revision (%d/%d) of Standard security handler",
783 encVersion
, encRevision
);
786 error(-1, "Weird encryption info");
796 error(-1, "Unknown security handler '%s'",
797 filterObj
.isName() ? filterObj
.getName() : "???");
803 // this flag has to be set *after* we read the O/U/P strings
804 encrypted
= encrypted1
;
809 GBool
XRef::checkEncrypted(GString
*ownerPassword
, GString
*userPassword
) {
813 trailerDict
.dictLookup("Encrypt", &obj
);
814 if ((encrypted
= !obj
.isNull())) {
815 error(-1, "PDF file is encrypted and this version of the Xpdf tools");
816 error(-1, "was built without decryption support.");
823 GBool
XRef::okToPrint(GBool ignoreOwnerPW
) {
824 #ifndef NO_DECRYPTION
825 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permPrint
);
831 GBool
XRef::okToChange(GBool ignoreOwnerPW
) {
832 #ifndef NO_DECRYPTION
833 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permChange
);
839 GBool
XRef::okToCopy(GBool ignoreOwnerPW
) {
840 #ifndef NO_DECRYPTION
841 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permCopy
);
847 GBool
XRef::okToAddNotes(GBool ignoreOwnerPW
) {
848 #ifndef NO_DECRYPTION
849 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permNotes
);
855 Object
*XRef::fetch(int num
, int gen
, Object
*obj
) {
858 Object obj1
, obj2
, obj3
;
860 // check for bogus ref - this can happen in corrupted PDF files
861 if (num
< 0 || num
>= size
) {
868 case xrefEntryUncompressed
:
873 parser
= new Parser(this,
875 str
->makeSubStream(start
+ e
->offset
, gFalse
, 0, &obj1
)));
876 parser
->getObj(&obj1
);
877 parser
->getObj(&obj2
);
878 parser
->getObj(&obj3
);
879 if (!obj1
.isInt() || obj1
.getInt() != num
||
880 !obj2
.isInt() || obj2
.getInt() != gen
||
881 !obj3
.isCmd("obj")) {
884 #ifndef NO_DECRYPTION
885 parser
->getObj(obj
, encrypted
? fileKey
: (Guchar
*)NULL
, keyLength
,
896 case xrefEntryCompressed
:
900 if (!objStr
|| objStr
->getObjStrNum() != (int)e
->offset
) {
904 objStr
= new ObjectStream(this, e
->offset
);
906 objStr
->getObject(e
->gen
, num
, obj
);
916 return obj
->initNull();
919 Object
*XRef::getDocInfo(Object
*obj
) {
920 return trailerDict
.dictLookup("Info", obj
);
923 // Added for the pdftex project.
924 Object
*XRef::getDocInfoNF(Object
*obj
) {
925 return trailerDict
.dictLookupNF("Info", obj
);
928 GBool
XRef::getStreamEnd(Guint streamStart
, Guint
*streamEnd
) {
931 if (streamEndsLen
== 0 ||
932 streamStart
> streamEnds
[streamEndsLen
- 1]) {
937 b
= streamEndsLen
- 1;
938 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
941 if (streamStart
<= streamEnds
[m
]) {
947 *streamEnd
= streamEnds
[b
];
951 Guint
XRef::strToUnsigned(char *s
) {
957 for (p
= s
, i
= 0; *p
&& isdigit(*p
) && i
< 10; ++p
, ++i
) {
958 x
= 10 * x
+ (*p
- '0');