1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <vcl/filter/pdfdocument.hxx>
16 #include <com/sun/star/uno/Sequence.hxx>
18 #include <comphelper/processfactory.hxx>
19 #include <comphelper/scopeguard.hxx>
20 #include <comphelper/string.hxx>
21 #include <filter/msfilter/mscodec.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <sax/tools/converter.hxx>
27 #include <tools/zcodec.hxx>
28 #include <unotools/calendarwrapper.hxx>
29 #include <unotools/datetime.hxx>
30 #include <vcl/pdfwriter.hxx>
31 #include <xmloff/xmluconv.hxx>
32 #include <o3tl/make_unique.hxx>
34 using namespace com::sun::star
;
41 const int MAX_SIGNATURE_CONTENT_LENGTH
= 50000;
43 class PDFTrailerElement
;
45 /// A one-liner comment.
46 class PDFCommentElement
: public PDFElement
52 explicit PDFCommentElement(PDFDocument
& rDoc
);
53 bool Read(SvStream
& rStream
) override
;
56 class PDFReferenceElement
;
58 /// End of a dictionary: '>>'.
59 class PDFEndDictionaryElement
: public PDFElement
61 /// Offset before the '>>' token.
62 sal_uInt64 m_nLocation
= 0;
64 PDFEndDictionaryElement();
65 bool Read(SvStream
& rStream
) override
;
66 sal_uInt64
GetLocation() const;
69 /// End of a stream: 'endstream' keyword.
70 class PDFEndStreamElement
: public PDFElement
73 bool Read(SvStream
& rStream
) override
;
76 /// End of a object: 'endobj' keyword.
77 class PDFEndObjectElement
: public PDFElement
80 bool Read(SvStream
& rStream
) override
;
83 /// End of an array: ']'.
84 class PDFEndArrayElement
: public PDFElement
86 /// Location before the ']' token.
87 sal_uInt64 m_nOffset
= 0;
90 bool Read(SvStream
& rStream
) override
;
91 sal_uInt64
GetOffset() const;
94 /// Boolean object: a 'true' or a 'false'.
95 class PDFBooleanElement
: public PDFElement
98 explicit PDFBooleanElement(bool bValue
);
99 bool Read(SvStream
& rStream
) override
;
102 /// Null object: the 'null' singleton.
103 class PDFNullElement
: public PDFElement
106 bool Read(SvStream
& rStream
) override
;
109 /// The trailer singleton is at the end of the doc.
110 class PDFTrailerElement
: public PDFElement
113 std::map
<OString
, PDFElement
*> m_aDictionary
;
114 /// Location of the end of the trailer token.
115 sal_uInt64 m_nOffset
= 0;
118 explicit PDFTrailerElement(PDFDocument
& rDoc
);
119 bool Read(SvStream
& rStream
) override
;
120 PDFElement
* Lookup(const OString
& rDictionaryKey
);
121 sal_uInt64
GetLocation() const;
124 XRefEntry::XRefEntry()
125 : m_eType(XRefEntryType::NOT_COMPRESSED
),
127 m_nGenerationNumber(0),
132 PDFDocument::PDFDocument()
133 : m_pTrailer(nullptr),
134 m_pXRefStream(nullptr)
138 bool PDFDocument::RemoveSignature(size_t nPosition
)
140 std::vector
<PDFObjectElement
*> aSignatures
= GetSignatureWidgets();
141 if (nPosition
>= aSignatures
.size())
143 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
147 if (aSignatures
.size() != m_aEOFs
.size() - 1)
149 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures and incremental updates");
153 // The EOF offset is the end of the original file, without the signature at
155 m_aEditBuffer
.Seek(m_aEOFs
[nPosition
]);
156 // Drop all bytes after the current position.
157 m_aEditBuffer
.SetStreamSize(m_aEditBuffer
.Tell() + 1);
159 return m_aEditBuffer
.good();
162 sal_uInt32
PDFDocument::GetNextSignature()
165 for (const auto& pSignature
: GetSignatureWidgets())
167 auto pT
= dynamic_cast<PDFLiteralStringElement
*>(pSignature
->Lookup("T"));
171 const OString
& rValue
= pT
->GetValue();
172 const OString aPrefix
= "Signature";
173 if (!rValue
.startsWith(aPrefix
))
176 nRet
= std::max(nRet
, rValue
.copy(aPrefix
.getLength()).toUInt32());
182 sal_Int32
PDFDocument::WriteSignatureObject(const OUString
& rDescription
, bool bAdES
, sal_uInt64
& rLastByteRangeOffset
, sal_Int64
& rContentOffset
)
184 // Write signature object.
185 sal_Int32 nSignatureId
= m_aXRef
.size();
186 XRefEntry aSignatureEntry
;
187 aSignatureEntry
.m_nOffset
= m_aEditBuffer
.Tell();
188 aSignatureEntry
.m_bDirty
= true;
189 m_aXRef
[nSignatureId
] = aSignatureEntry
;
190 OStringBuffer aSigBuffer
;
191 aSigBuffer
.append(nSignatureId
);
192 aSigBuffer
.append(" 0 obj\n");
193 aSigBuffer
.append("<</Contents <");
194 rContentOffset
= aSignatureEntry
.m_nOffset
+ aSigBuffer
.getLength();
195 // Reserve space for the PKCS#7 object.
196 OStringBuffer
aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH
);
197 comphelper::string::padToLength(aContentFiller
, MAX_SIGNATURE_CONTENT_LENGTH
, '0');
198 aSigBuffer
.append(aContentFiller
.makeStringAndClear());
199 aSigBuffer
.append(">\n/Type/Sig/SubFilter");
201 aSigBuffer
.append("/ETSI.CAdES.detached");
203 aSigBuffer
.append("/adbe.pkcs7.detached");
206 aSigBuffer
.append(" /M (");
207 aSigBuffer
.append(vcl::PDFWriter::GetDateTime());
208 aSigBuffer
.append(")");
210 // Byte range: we can write offset1-length1 and offset2 right now, will
211 // write length2 later.
212 aSigBuffer
.append(" /ByteRange [ 0 ");
213 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
214 aSigBuffer
.append(rContentOffset
- 1);
215 aSigBuffer
.append(" ");
216 aSigBuffer
.append(rContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
217 aSigBuffer
.append(" ");
218 rLastByteRangeOffset
= aSignatureEntry
.m_nOffset
+ aSigBuffer
.getLength();
219 // We don't know how many bytes we need for the last ByteRange value, this
221 OStringBuffer aByteRangeFiller
;
222 comphelper::string::padToLength(aByteRangeFiller
, 100, ' ');
223 aSigBuffer
.append(aByteRangeFiller
.makeStringAndClear());
224 // Finish the Sig obj.
225 aSigBuffer
.append(" /Filter/Adobe.PPKMS");
227 if (!rDescription
.isEmpty())
229 aSigBuffer
.append("/Reason<");
230 vcl::PDFWriter::AppendUnicodeTextString(rDescription
, aSigBuffer
);
231 aSigBuffer
.append(">");
234 aSigBuffer
.append(" >>\nendobj\n\n");
235 m_aEditBuffer
.WriteOString(aSigBuffer
.toString());
240 sal_Int32
PDFDocument::WriteAppearanceObject()
242 // Write appearance object.
243 sal_Int32 nAppearanceId
= m_aXRef
.size();
244 XRefEntry aAppearanceEntry
;
245 aAppearanceEntry
.m_nOffset
= m_aEditBuffer
.Tell();
246 aAppearanceEntry
.m_bDirty
= true;
247 m_aXRef
[nAppearanceId
] = aAppearanceEntry
;
248 m_aEditBuffer
.WriteUInt32AsString(nAppearanceId
);
249 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
250 m_aEditBuffer
.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
251 m_aEditBuffer
.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
252 m_aEditBuffer
.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
254 return nAppearanceId
;
257 sal_Int32
PDFDocument::WriteAnnotObject(PDFObjectElement
& rFirstPage
, sal_Int32 nSignatureId
, sal_Int32 nAppearanceId
)
259 // Decide what identifier to use for the new signature.
260 sal_uInt32 nNextSignature
= GetNextSignature();
262 // Write the Annot object, references nSignatureId and nAppearanceId.
263 sal_Int32 nAnnotId
= m_aXRef
.size();
264 XRefEntry aAnnotEntry
;
265 aAnnotEntry
.m_nOffset
= m_aEditBuffer
.Tell();
266 aAnnotEntry
.m_bDirty
= true;
267 m_aXRef
[nAnnotId
] = aAnnotEntry
;
268 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
269 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
270 m_aEditBuffer
.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
271 m_aEditBuffer
.WriteCharPtr("/Rect[0 0 0 0]\n");
272 m_aEditBuffer
.WriteCharPtr("/FT/Sig\n");
273 m_aEditBuffer
.WriteCharPtr("/P ");
274 m_aEditBuffer
.WriteUInt32AsString(rFirstPage
.GetObjectValue());
275 m_aEditBuffer
.WriteCharPtr(" 0 R\n");
276 m_aEditBuffer
.WriteCharPtr("/T(Signature");
277 m_aEditBuffer
.WriteUInt32AsString(nNextSignature
);
278 m_aEditBuffer
.WriteCharPtr(")\n");
279 m_aEditBuffer
.WriteCharPtr("/V ");
280 m_aEditBuffer
.WriteUInt32AsString(nSignatureId
);
281 m_aEditBuffer
.WriteCharPtr(" 0 R\n");
282 m_aEditBuffer
.WriteCharPtr("/DV ");
283 m_aEditBuffer
.WriteUInt32AsString(nSignatureId
);
284 m_aEditBuffer
.WriteCharPtr(" 0 R\n");
285 m_aEditBuffer
.WriteCharPtr("/AP<<\n/N ");
286 m_aEditBuffer
.WriteUInt32AsString(nAppearanceId
);
287 m_aEditBuffer
.WriteCharPtr(" 0 R\n>>\n");
288 m_aEditBuffer
.WriteCharPtr(">>\nendobj\n\n");
293 bool PDFDocument::WritePageObject(PDFObjectElement
& rFirstPage
, sal_Int32 nAnnotId
)
295 PDFElement
* pAnnots
= rFirstPage
.Lookup("Annots");
296 auto pAnnotsReference
= dynamic_cast<PDFReferenceElement
*>(pAnnots
);
297 if (pAnnotsReference
)
299 // Write the updated Annots key of the Page object.
300 PDFObjectElement
* pAnnotsObject
= pAnnotsReference
->LookupObject();
303 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
307 sal_uInt32 nAnnotsId
= pAnnotsObject
->GetObjectValue();
308 m_aXRef
[nAnnotsId
].m_eType
= XRefEntryType::NOT_COMPRESSED
;
309 m_aXRef
[nAnnotsId
].m_nOffset
= m_aEditBuffer
.Tell();
310 m_aXRef
[nAnnotsId
].m_nGenerationNumber
= 0;
311 m_aXRef
[nAnnotsId
].m_bDirty
= true;
312 m_aEditBuffer
.WriteUInt32AsString(nAnnotsId
);
313 m_aEditBuffer
.WriteCharPtr(" 0 obj\n[");
315 // Write existing references.
316 PDFArrayElement
* pArray
= pAnnotsObject
->GetArray();
319 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
323 for (size_t i
= 0; i
< pArray
->GetElements().size(); ++i
)
325 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pArray
->GetElements()[i
]);
330 m_aEditBuffer
.WriteCharPtr(" ");
331 m_aEditBuffer
.WriteUInt32AsString(pReference
->GetObjectValue());
332 m_aEditBuffer
.WriteCharPtr(" 0 R");
334 // Write our reference.
335 m_aEditBuffer
.WriteCharPtr(" ");
336 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
337 m_aEditBuffer
.WriteCharPtr(" 0 R");
339 m_aEditBuffer
.WriteCharPtr("]\nendobj\n\n");
343 // Write the updated first page object, references nAnnotId.
344 sal_uInt32 nFirstPageId
= rFirstPage
.GetObjectValue();
345 if (nFirstPageId
>= m_aXRef
.size())
347 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
350 m_aXRef
[nFirstPageId
].m_nOffset
= m_aEditBuffer
.Tell();
351 m_aXRef
[nFirstPageId
].m_bDirty
= true;
352 m_aEditBuffer
.WriteUInt32AsString(nFirstPageId
);
353 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
354 m_aEditBuffer
.WriteCharPtr("<<");
355 auto pAnnotsArray
= dynamic_cast<PDFArrayElement
*>(pAnnots
);
358 // No Annots key, just write the key with a single reference.
359 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + rFirstPage
.GetDictionaryOffset(), rFirstPage
.GetDictionaryLength());
360 m_aEditBuffer
.WriteCharPtr("/Annots[");
361 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
362 m_aEditBuffer
.WriteCharPtr(" 0 R]");
366 // Annots key is already there, insert our reference at the end.
367 PDFDictionaryElement
* pDictionary
= rFirstPage
.GetDictionary();
369 // Offset right before the end of the Annots array.
370 sal_uInt64 nAnnotsEndOffset
= pDictionary
->GetKeyOffset("Annots") + pDictionary
->GetKeyValueLength("Annots") - 1;
371 // Length of beginning of the dictionary -> Annots end.
372 sal_uInt64 nAnnotsBeforeEndLength
= nAnnotsEndOffset
- rFirstPage
.GetDictionaryOffset();
373 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + rFirstPage
.GetDictionaryOffset(), nAnnotsBeforeEndLength
);
374 m_aEditBuffer
.WriteCharPtr(" ");
375 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
376 m_aEditBuffer
.WriteCharPtr(" 0 R");
377 // Length of Annots end -> end of the dictionary.
378 sal_uInt64 nAnnotsAfterEndLength
= rFirstPage
.GetDictionaryOffset() + rFirstPage
.GetDictionaryLength() - nAnnotsEndOffset
;
379 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + nAnnotsEndOffset
, nAnnotsAfterEndLength
);
381 m_aEditBuffer
.WriteCharPtr(">>");
382 m_aEditBuffer
.WriteCharPtr("\nendobj\n\n");
388 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId
, PDFReferenceElement
*& pRoot
)
391 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Root"));
396 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
399 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pTrailer
->Lookup("Root"));
403 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
406 PDFObjectElement
* pCatalog
= pRoot
->LookupObject();
409 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
412 sal_uInt32 nCatalogId
= pCatalog
->GetObjectValue();
413 if (nCatalogId
>= m_aXRef
.size())
415 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
418 PDFElement
* pAcroForm
= pCatalog
->Lookup("AcroForm");
419 auto pAcroFormReference
= dynamic_cast<PDFReferenceElement
*>(pAcroForm
);
420 if (pAcroFormReference
)
422 // Write the updated AcroForm key of the Catalog object.
423 PDFObjectElement
* pAcroFormObject
= pAcroFormReference
->LookupObject();
424 if (!pAcroFormObject
)
426 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
430 sal_uInt32 nAcroFormId
= pAcroFormObject
->GetObjectValue();
431 m_aXRef
[nAcroFormId
].m_eType
= XRefEntryType::NOT_COMPRESSED
;
432 m_aXRef
[nAcroFormId
].m_nOffset
= m_aEditBuffer
.Tell();
433 m_aXRef
[nAcroFormId
].m_nGenerationNumber
= 0;
434 m_aXRef
[nAcroFormId
].m_bDirty
= true;
435 m_aEditBuffer
.WriteUInt32AsString(nAcroFormId
);
436 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
438 // If this is nullptr, then the AcroForm object is not in an object stream.
439 SvMemoryStream
* pStreamBuffer
= pAcroFormObject
->GetStreamBuffer();
441 if (!pAcroFormObject
->Lookup("Fields"))
443 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object without required Fields key");
447 PDFDictionaryElement
* pAcroFormDictionary
= pAcroFormObject
->GetDictionary();
448 if (!pAcroFormDictionary
)
450 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
454 // Offset right before the end of the Fields array.
455 sal_uInt64 nFieldsEndOffset
= pAcroFormDictionary
->GetKeyOffset("Fields") + pAcroFormDictionary
->GetKeyValueLength("Fields") - strlen("]");
456 // Length of beginning of the object dictionary -> Fields end.
457 sal_uInt64 nFieldsBeforeEndLength
= nFieldsEndOffset
;
459 m_aEditBuffer
.WriteBytes(pStreamBuffer
->GetData(), nFieldsBeforeEndLength
);
462 nFieldsBeforeEndLength
-= pAcroFormObject
->GetDictionaryOffset();
463 m_aEditBuffer
.WriteCharPtr("<<");
464 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + pAcroFormObject
->GetDictionaryOffset(), nFieldsBeforeEndLength
);
467 // Append our reference at the end of the Fields array.
468 m_aEditBuffer
.WriteCharPtr(" ");
469 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
470 m_aEditBuffer
.WriteCharPtr(" 0 R");
472 // Length of Fields end -> end of the object dictionary.
475 sal_uInt64 nFieldsAfterEndLength
= pStreamBuffer
->GetSize() - nFieldsEndOffset
;
476 m_aEditBuffer
.WriteBytes(static_cast<const char*>(pStreamBuffer
->GetData()) + nFieldsEndOffset
, nFieldsAfterEndLength
);
480 sal_uInt64 nFieldsAfterEndLength
= pAcroFormObject
->GetDictionaryOffset() + pAcroFormObject
->GetDictionaryLength() - nFieldsEndOffset
;
481 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + nFieldsEndOffset
, nFieldsAfterEndLength
);
482 m_aEditBuffer
.WriteCharPtr(">>");
485 m_aEditBuffer
.WriteCharPtr("\nendobj\n\n");
489 // Write the updated Catalog object, references nAnnotId.
490 auto pAcroFormDictionary
= dynamic_cast<PDFDictionaryElement
*>(pAcroForm
);
491 m_aXRef
[nCatalogId
].m_nOffset
= m_aEditBuffer
.Tell();
492 m_aXRef
[nCatalogId
].m_bDirty
= true;
493 m_aEditBuffer
.WriteUInt32AsString(nCatalogId
);
494 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
495 m_aEditBuffer
.WriteCharPtr("<<");
496 if (!pAcroFormDictionary
)
498 // No AcroForm key, assume no signatures.
499 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + pCatalog
->GetDictionaryOffset(), pCatalog
->GetDictionaryLength());
500 m_aEditBuffer
.WriteCharPtr("/AcroForm<</Fields[\n");
501 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
502 m_aEditBuffer
.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
506 // AcroForm key is already there, insert our reference at the Fields end.
507 auto it
= pAcroFormDictionary
->GetItems().find("Fields");
508 if (it
== pAcroFormDictionary
->GetItems().end())
510 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
514 auto pFields
= dynamic_cast<PDFArrayElement
*>(it
->second
);
517 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
521 // Offset right before the end of the Fields array.
522 sal_uInt64 nFieldsEndOffset
= pAcroFormDictionary
->GetKeyOffset("Fields") + pAcroFormDictionary
->GetKeyValueLength("Fields") - 1;
523 // Length of beginning of the Catalog dictionary -> Fields end.
524 sal_uInt64 nFieldsBeforeEndLength
= nFieldsEndOffset
- pCatalog
->GetDictionaryOffset();
525 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + pCatalog
->GetDictionaryOffset(), nFieldsBeforeEndLength
);
526 m_aEditBuffer
.WriteCharPtr(" ");
527 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
528 m_aEditBuffer
.WriteCharPtr(" 0 R");
529 // Length of Fields end -> end of the Catalog dictionary.
530 sal_uInt64 nFieldsAfterEndLength
= pCatalog
->GetDictionaryOffset() + pCatalog
->GetDictionaryLength() - nFieldsEndOffset
;
531 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData()) + nFieldsEndOffset
, nFieldsAfterEndLength
);
533 m_aEditBuffer
.WriteCharPtr(">>\nendobj\n\n");
539 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset
, PDFReferenceElement
* pRoot
)
543 // Write the xref stream.
544 // This is a bit meta: the xref stream stores its own offset.
545 sal_Int32 nXRefStreamId
= m_aXRef
.size();
546 XRefEntry aXRefStreamEntry
;
547 aXRefStreamEntry
.m_nOffset
= nXRefOffset
;
548 aXRefStreamEntry
.m_bDirty
= true;
549 m_aXRef
[nXRefStreamId
] = aXRefStreamEntry
;
551 // Write stream data.
552 SvMemoryStream aXRefStream
;
553 const size_t nOffsetLen
= 3;
554 // 3 additional bytes: predictor, the first and the third field.
555 const size_t nLineLength
= nOffsetLen
+ 3;
556 // This is the line as it appears before tweaking according to the predictor.
557 std::vector
<unsigned char> aOrigLine(nLineLength
);
558 // This is the previous line.
559 std::vector
<unsigned char> aPrevLine(nLineLength
);
560 // This is the line as written to the stream.
561 std::vector
<unsigned char> aFilteredLine(nLineLength
);
562 for (const auto& rXRef
: m_aXRef
)
564 const XRefEntry
& rEntry
= rXRef
.second
;
566 if (!rEntry
.m_bDirty
)
571 // PNG prediction: up (on all rows).
572 aOrigLine
[nPos
++] = 2;
575 unsigned char nType
= 0;
576 switch (rEntry
.m_eType
)
578 case XRefEntryType::FREE
:
581 case XRefEntryType::NOT_COMPRESSED
:
584 case XRefEntryType::COMPRESSED
:
588 aOrigLine
[nPos
++] = nType
;
591 for (size_t i
= 0; i
< nOffsetLen
; ++i
)
593 size_t nByte
= nOffsetLen
- i
- 1;
594 // Fields requiring more than one byte are stored with the
595 // high-order byte first.
596 unsigned char nCh
= (rEntry
.m_nOffset
& (0xff << (nByte
* 8))) >> (nByte
* 8);
597 aOrigLine
[nPos
++] = nCh
;
601 aOrigLine
[nPos
++] = 0;
603 // Now apply the predictor.
604 aFilteredLine
[0] = aOrigLine
[0];
605 for (size_t i
= 1; i
< nLineLength
; ++i
)
607 // Count the delta vs the previous line.
608 aFilteredLine
[i
] = aOrigLine
[i
] - aPrevLine
[i
];
609 // Remember the new reference.
610 aPrevLine
[i
] = aOrigLine
[i
];
613 aXRefStream
.WriteBytes(aFilteredLine
.data(), aFilteredLine
.size());
616 m_aEditBuffer
.WriteUInt32AsString(nXRefStreamId
);
617 m_aEditBuffer
.WriteCharPtr(" 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
620 auto pID
= dynamic_cast<PDFArrayElement
*>(m_pXRefStream
->Lookup("ID"));
623 const std::vector
<PDFElement
*>& rElements
= pID
->GetElements();
624 m_aEditBuffer
.WriteCharPtr("/ID [ <");
625 for (size_t i
= 0; i
< rElements
.size(); ++i
)
627 auto pIDString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
]);
631 m_aEditBuffer
.WriteOString(pIDString
->GetValue());
632 if ((i
+ 1) < rElements
.size())
633 m_aEditBuffer
.WriteCharPtr("> <");
635 m_aEditBuffer
.WriteCharPtr("> ] ");
639 m_aEditBuffer
.WriteCharPtr("/Index [ ");
640 for (const auto& rXRef
: m_aXRef
)
642 if (!rXRef
.second
.m_bDirty
)
645 m_aEditBuffer
.WriteUInt32AsString(rXRef
.first
);
646 m_aEditBuffer
.WriteCharPtr(" 1 ");
648 m_aEditBuffer
.WriteCharPtr("] ");
651 auto pInfo
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Info"));
654 m_aEditBuffer
.WriteCharPtr("/Info ");
655 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetObjectValue());
656 m_aEditBuffer
.WriteCharPtr(" ");
657 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetGenerationValue());
658 m_aEditBuffer
.WriteCharPtr(" R ");
662 m_aEditBuffer
.WriteCharPtr("/Length ");
665 aZCodec
.BeginCompression();
667 SvMemoryStream aStream
;
668 aZCodec
.Compress(aXRefStream
, aStream
);
669 aZCodec
.EndCompression();
671 aXRefStream
.SetStreamSize(0);
673 aXRefStream
.WriteStream(aStream
);
675 m_aEditBuffer
.WriteUInt32AsString(aXRefStream
.GetSize());
677 if (!m_aStartXRefs
.empty())
679 // Write location of the previous cross-reference section.
680 m_aEditBuffer
.WriteCharPtr("/Prev ");
681 m_aEditBuffer
.WriteUInt32AsString(m_aStartXRefs
.back());
685 m_aEditBuffer
.WriteCharPtr("/Root ");
686 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetObjectValue());
687 m_aEditBuffer
.WriteCharPtr(" ");
688 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetGenerationValue());
689 m_aEditBuffer
.WriteCharPtr(" R ");
692 m_aEditBuffer
.WriteCharPtr("/Size ");
693 m_aEditBuffer
.WriteUInt32AsString(m_aXRef
.size());
695 m_aEditBuffer
.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
697 m_aEditBuffer
.WriteStream(aXRefStream
);
698 m_aEditBuffer
.WriteCharPtr("\nendstream\nendobj\n\n");
702 // Write the xref table.
703 m_aEditBuffer
.WriteCharPtr("xref\n");
704 for (const auto& rXRef
: m_aXRef
)
706 size_t nObject
= rXRef
.first
;
707 size_t nOffset
= rXRef
.second
.m_nOffset
;
708 if (!rXRef
.second
.m_bDirty
)
711 m_aEditBuffer
.WriteUInt32AsString(nObject
);
712 m_aEditBuffer
.WriteCharPtr(" 1\n");
713 OStringBuffer aBuffer
;
714 aBuffer
.append(static_cast<sal_Int32
>(nOffset
));
715 while (aBuffer
.getLength() < 10)
716 aBuffer
.insert(0, "0");
718 aBuffer
.append(" 65535 f \n");
720 aBuffer
.append(" 00000 n \n");
721 m_aEditBuffer
.WriteOString(aBuffer
.toString());
724 // Write the trailer.
725 m_aEditBuffer
.WriteCharPtr("trailer\n<</Size ");
726 m_aEditBuffer
.WriteUInt32AsString(m_aXRef
.size());
727 m_aEditBuffer
.WriteCharPtr("/Root ");
728 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetObjectValue());
729 m_aEditBuffer
.WriteCharPtr(" ");
730 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetGenerationValue());
731 m_aEditBuffer
.WriteCharPtr(" R\n");
732 auto pInfo
= dynamic_cast<PDFReferenceElement
*>(m_pTrailer
->Lookup("Info"));
735 m_aEditBuffer
.WriteCharPtr("/Info ");
736 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetObjectValue());
737 m_aEditBuffer
.WriteCharPtr(" ");
738 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetGenerationValue());
739 m_aEditBuffer
.WriteCharPtr(" R\n");
741 auto pID
= dynamic_cast<PDFArrayElement
*>(m_pTrailer
->Lookup("ID"));
744 const std::vector
<PDFElement
*>& rElements
= pID
->GetElements();
745 m_aEditBuffer
.WriteCharPtr("/ID [ <");
746 for (size_t i
= 0; i
< rElements
.size(); ++i
)
748 auto pIDString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
]);
752 m_aEditBuffer
.WriteOString(pIDString
->GetValue());
753 if ((i
+ 1) < rElements
.size())
754 m_aEditBuffer
.WriteCharPtr(">\n<");
756 m_aEditBuffer
.WriteCharPtr("> ]\n");
759 if (!m_aStartXRefs
.empty())
761 // Write location of the previous cross-reference section.
762 m_aEditBuffer
.WriteCharPtr("/Prev ");
763 m_aEditBuffer
.WriteUInt32AsString(m_aStartXRefs
.back());
766 m_aEditBuffer
.WriteCharPtr(">>\n");
770 bool PDFDocument::Sign(const uno::Reference
<security::XCertificate
>& xCertificate
, const OUString
& rDescription
, bool bAdES
)
772 m_aEditBuffer
.Seek(STREAM_SEEK_TO_END
);
773 m_aEditBuffer
.WriteCharPtr("\n");
775 sal_uInt64 nSignatureLastByteRangeOffset
= 0;
776 sal_Int64 nSignatureContentOffset
= 0;
777 sal_Int32 nSignatureId
= WriteSignatureObject(rDescription
, bAdES
, nSignatureLastByteRangeOffset
, nSignatureContentOffset
);
779 sal_Int32 nAppearanceId
= WriteAppearanceObject();
781 std::vector
<PDFObjectElement
*> aPages
= GetPages();
782 if (aPages
.empty() || !aPages
[0])
784 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
788 PDFObjectElement
& rFirstPage
= *aPages
[0];
789 sal_Int32 nAnnotId
= WriteAnnotObject(rFirstPage
, nSignatureId
, nAppearanceId
);
791 if (!WritePageObject(rFirstPage
, nAnnotId
))
793 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
797 PDFReferenceElement
* pRoot
= nullptr;
798 if (!WriteCatalogObject(nAnnotId
, pRoot
))
800 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
804 sal_uInt64 nXRefOffset
= m_aEditBuffer
.Tell();
805 WriteXRef(nXRefOffset
, pRoot
);
808 m_aEditBuffer
.WriteCharPtr("startxref\n");
809 m_aEditBuffer
.WriteUInt32AsString(nXRefOffset
);
810 m_aEditBuffer
.WriteCharPtr("\n%%EOF\n");
812 // Finalize the signature, now that we know the total file size.
813 // Calculate the length of the last byte range.
814 sal_uInt64 nFileEnd
= m_aEditBuffer
.Tell();
815 sal_Int64 nLastByteRangeLength
= nFileEnd
- (nSignatureContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
816 // Write the length to the buffer.
817 m_aEditBuffer
.Seek(nSignatureLastByteRangeOffset
);
818 OStringBuffer aByteRangeBuffer
;
819 aByteRangeBuffer
.append(nLastByteRangeLength
);
820 aByteRangeBuffer
.append(" ]");
821 m_aEditBuffer
.WriteOString(aByteRangeBuffer
.toString());
823 // Create the PKCS#7 object.
824 css::uno::Sequence
<sal_Int8
> aDerEncoded
= xCertificate
->getEncoded();
825 if (!aDerEncoded
.hasElements())
827 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
831 m_aEditBuffer
.Seek(0);
832 sal_uInt64 nBufferSize1
= nSignatureContentOffset
- 1;
833 std::unique_ptr
<char[]> aBuffer1(new char[nBufferSize1
]);
834 m_aEditBuffer
.ReadBytes(aBuffer1
.get(), nBufferSize1
);
836 m_aEditBuffer
.Seek(nSignatureContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
837 sal_uInt64 nBufferSize2
= nLastByteRangeLength
;
838 std::unique_ptr
<char[]> aBuffer2(new char[nBufferSize2
]);
839 m_aEditBuffer
.ReadBytes(aBuffer2
.get(), nBufferSize2
);
841 OStringBuffer aCMSHexBuffer
;
842 vcl::PDFWriter::PDFSignContext
aSignContext(aCMSHexBuffer
);
843 aSignContext
.m_pDerEncoded
= aDerEncoded
.getArray();
844 aSignContext
.m_nDerEncoded
= aDerEncoded
.getLength();
845 aSignContext
.m_pByteRange1
= aBuffer1
.get();
846 aSignContext
.m_nByteRange1
= nBufferSize1
;
847 aSignContext
.m_pByteRange2
= aBuffer2
.get();
848 aSignContext
.m_nByteRange2
= nBufferSize2
;
849 if (!vcl::PDFWriter::Sign(aSignContext
))
851 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
855 assert(aCMSHexBuffer
.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH
);
857 m_aEditBuffer
.Seek(nSignatureContentOffset
);
858 m_aEditBuffer
.WriteOString(aCMSHexBuffer
.toString());
863 bool PDFDocument::Write(SvStream
& rStream
)
865 m_aEditBuffer
.Seek(0);
866 rStream
.WriteStream(m_aEditBuffer
);
867 return rStream
.good();
870 bool PDFDocument::Tokenize(SvStream
& rStream
, TokenizeMode eMode
, std::vector
< std::unique_ptr
<PDFElement
> >& rElements
, PDFObjectElement
* pObjectElement
)
872 // Last seen object token.
873 PDFObjectElement
* pObject
= pObjectElement
;
874 PDFNameElement
* pObjectKey
= nullptr;
875 PDFObjectElement
* pObjectStream
= nullptr;
876 bool bInXRef
= false;
877 // The next number will be an xref offset.
878 bool bInStartXRef
= false;
879 // Dictionary depth, so we know when we're outside any dictionaries.
880 int nDictionaryDepth
= 0;
881 // Array depth, only the offset/length of the toplevel array is tracked.
883 // Last seen array token that's outside any dictionaries.
884 PDFArrayElement
* pArray
= nullptr;
885 // If we're inside an obj/endobj pair.
886 bool bInObject
= false;
890 rStream
.ReadChar(ch
);
898 auto pComment
= new PDFCommentElement(*this);
899 rElements
.push_back(std::unique_ptr
<PDFElement
>(pComment
));
901 if (!rElements
.back()->Read(rStream
))
903 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
906 if (eMode
== TokenizeMode::EOF_TOKEN
&& !m_aEOFs
.empty() && m_aEOFs
.back() == rStream
.Tell())
908 // Found EOF and partial parsing requested, we're done.
915 // Dictionary or hex string.
916 rStream
.ReadChar(ch
);
920 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFDictionaryElement()));
924 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFHexStringElement
));
925 if (!rElements
.back()->Read(rStream
))
927 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
934 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndDictionaryElement()));
937 if (!rElements
.back()->Read(rStream
))
939 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
946 auto pArr
= new PDFArrayElement(pObject
);
947 rElements
.push_back(std::unique_ptr
<PDFElement
>(pArr
));
948 if (nDictionaryDepth
== 0 && nArrayDepth
== 0)
950 // The array is attached directly, inform the object.
954 pObject
->SetArray(pArray
);
955 pObject
->SetArrayOffset(rStream
.Tell());
960 if (!rElements
.back()->Read(rStream
))
962 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
969 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndArrayElement()));
971 if (nArrayDepth
== 0)
974 if (nDictionaryDepth
== 0 && nArrayDepth
== 0)
978 pObject
->SetArrayLength(rStream
.Tell() - pObject
->GetArrayOffset());
981 if (!rElements
.back()->Read(rStream
))
983 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
990 auto pNameElement
= new PDFNameElement();
991 rElements
.push_back(std::unique_ptr
<PDFElement
>(pNameElement
));
993 if (!pNameElement
->Read(rStream
))
995 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
998 if (pObject
&& pObjectKey
&& pObjectKey
->GetValue() == "Type" && pNameElement
->GetValue() == "ObjStm")
999 pObjectStream
= pObject
;
1001 pObjectKey
= pNameElement
;
1006 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFLiteralStringElement
));
1007 rStream
.SeekRel(-1);
1008 if (!rElements
.back()->Read(rStream
))
1010 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1017 if (isdigit(ch
) || ch
== '-')
1019 // Numbering object: an integer or a real.
1020 auto pNumberElement
= new PDFNumberElement();
1021 rElements
.push_back(std::unique_ptr
<PDFElement
>(pNumberElement
));
1022 rStream
.SeekRel(-1);
1023 if (!pNumberElement
->Read(rStream
))
1025 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1030 bInStartXRef
= false;
1031 m_aStartXRefs
.push_back(pNumberElement
->GetValue());
1033 auto it
= m_aOffsetObjects
.find(pNumberElement
->GetValue());
1034 if (it
!= m_aOffsetObjects
.end())
1035 m_pXRefStream
= it
->second
;
1037 else if (bInObject
&& !nDictionaryDepth
&& !nArrayDepth
&& pObject
)
1038 // Number element inside an object, but outside a
1039 // dictionary / array: remember it.
1040 pObject
->SetNumberElement(pNumberElement
);
1042 else if (isalpha(ch
))
1044 // Possible keyword, like "obj".
1045 rStream
.SeekRel(-1);
1046 OString aKeyword
= ReadKeyword(rStream
);
1048 bool bObj
= aKeyword
== "obj";
1049 if (bObj
|| aKeyword
== "R")
1051 size_t nElements
= rElements
.size();
1054 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two tokens before 'obj' or 'R' keyword");
1058 auto pObjectNumber
= dynamic_cast<PDFNumberElement
*>(rElements
[nElements
- 2].get());
1059 auto pGenerationNumber
= dynamic_cast<PDFNumberElement
*>(rElements
[nElements
- 1].get());
1060 if (!pObjectNumber
|| !pGenerationNumber
)
1062 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or generation number before 'obj' or 'R' keyword");
1068 pObject
= new PDFObjectElement(*this, pObjectNumber
->GetValue(), pGenerationNumber
->GetValue());
1069 rElements
.push_back(std::unique_ptr
<PDFElement
>(pObject
));
1070 m_aOffsetObjects
[pObjectNumber
->GetLocation()] = pObject
;
1071 m_aIDObjects
[pObjectNumber
->GetValue()] = pObject
;
1076 auto pReference
= new PDFReferenceElement(*this, *pObjectNumber
, *pGenerationNumber
);
1077 rElements
.push_back(std::unique_ptr
<PDFElement
>(pReference
));
1079 // Reference is part of a direct (non-dictionary) array, inform the array.
1080 pArray
->PushBack(rElements
.back().get());
1081 if (bInObject
&& nDictionaryDepth
> 0 && pObject
)
1082 // Inform the object about a new in-dictionary reference.
1083 pObject
->AddDictionaryReference(pReference
);
1085 if (!rElements
.back()->Read(rStream
))
1087 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFElement::Read() failed");
1091 else if (aKeyword
== "stream")
1093 // Look up the length of the stream from the parent object's dictionary.
1095 for (size_t nElement
= 0; nElement
< rElements
.size(); ++nElement
)
1097 // Iterate in reverse order.
1098 size_t nIndex
= rElements
.size() - nElement
- 1;
1099 PDFElement
* pElement
= rElements
[nIndex
].get();
1100 auto pObj
= dynamic_cast<PDFObjectElement
*>(pElement
);
1104 PDFElement
* pLookup
= pObj
->Lookup("Length");
1105 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pLookup
);
1108 // Length is provided as a reference.
1109 nLength
= pReference
->LookupNumber(rStream
);
1113 auto pNumber
= dynamic_cast<PDFNumberElement
*>(pLookup
);
1116 // Length is provided directly.
1117 nLength
= pNumber
->GetValue();
1121 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: found no Length key for stream keyword");
1125 PDFDocument::SkipLineBreaks(rStream
);
1126 auto pStreamElement
= new PDFStreamElement(nLength
);
1128 pObject
->SetStream(pStreamElement
);
1129 rElements
.push_back(std::unique_ptr
<PDFElement
>(pStreamElement
));
1130 if (!rElements
.back()->Read(rStream
))
1132 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1136 else if (aKeyword
== "endstream")
1138 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndStreamElement
));
1139 if (!rElements
.back()->Read(rStream
))
1141 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1145 else if (aKeyword
== "endobj")
1147 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndObjectElement
));
1148 if (!rElements
.back()->Read(rStream
))
1150 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1153 if (eMode
== TokenizeMode::END_OF_OBJECT
)
1155 // Found endobj and only object parsing was requested, we're done.
1161 // We're at the end of an object stream, parse the stored objects.
1162 pObjectStream
->ParseStoredObjects();
1163 pObjectStream
= nullptr;
1164 pObjectKey
= nullptr;
1168 else if (aKeyword
== "true" || aKeyword
== "false")
1169 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFBooleanElement(aKeyword
.toBoolean())));
1170 else if (aKeyword
== "null")
1171 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFNullElement
));
1172 else if (aKeyword
== "xref")
1173 // Allow 'f' and 'n' keywords.
1175 else if (bInXRef
&& (aKeyword
== "f" || aKeyword
== "n"))
1178 else if (aKeyword
== "trailer")
1180 auto pTrailer
= new PDFTrailerElement(*this);
1182 // Make it possible to find this trailer later by offset.
1183 pTrailer
->Read(rStream
);
1184 m_aOffsetTrailers
[pTrailer
->GetLocation()] = pTrailer
;
1186 // When reading till the first EOF token only, remember
1187 // just the first trailer token.
1188 if (eMode
!= TokenizeMode::EOF_TOKEN
|| !m_pTrailer
)
1189 m_pTrailer
= pTrailer
;
1190 rElements
.push_back(std::unique_ptr
<PDFElement
>(pTrailer
));
1192 else if (aKeyword
== "startxref")
1194 bInStartXRef
= true;
1198 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '" << aKeyword
<< "' keyword at byte position " << rStream
.Tell());
1206 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: " << ch
<< " at byte position " << rStream
.Tell());
1218 void PDFDocument::SetIDObject(size_t nID
, PDFObjectElement
* pObject
)
1220 m_aIDObjects
[nID
] = pObject
;
1223 bool PDFDocument::Read(SvStream
& rStream
)
1225 // Check file magic.
1226 std::vector
<sal_Int8
> aHeader(5);
1228 rStream
.ReadBytes(aHeader
.data(), aHeader
.size());
1229 if (aHeader
[0] != '%' || aHeader
[1] != 'P' || aHeader
[2] != 'D' || aHeader
[3] != 'F' || aHeader
[4] != '-')
1231 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1235 // Allow later editing of the contents in-memory.
1237 m_aEditBuffer
.WriteStream(rStream
);
1239 // Look up the offset of the xref table.
1240 size_t nStartXRef
= FindStartXRef(rStream
);
1241 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef
);
1242 if (nStartXRef
== 0)
1244 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1249 rStream
.Seek(nStartXRef
);
1250 OString aKeyword
= ReadKeyword(rStream
);
1251 if (aKeyword
.isEmpty())
1252 ReadXRefStream(rStream
);
1256 if (aKeyword
!= "xref")
1258 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1262 if (!Tokenize(rStream
, TokenizeMode::EOF_TOKEN
, m_aElements
, nullptr))
1264 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1269 PDFNumberElement
* pPrev
= nullptr;
1272 pPrev
= dynamic_cast<PDFNumberElement
*>(m_pTrailer
->Lookup("Prev"));
1274 // Remember the offset of this trailer in the correct order. It's
1275 // possible that newer trailers don't have a larger offset.
1276 m_aTrailerOffsets
.push_back(m_pTrailer
->GetLocation());
1278 else if (m_pXRefStream
)
1279 pPrev
= dynamic_cast<PDFNumberElement
*>(m_pXRefStream
->Lookup("Prev"));
1281 nStartXRef
= pPrev
->GetValue();
1283 // Reset state, except the edit buffer.
1284 m_aElements
.clear();
1285 m_aOffsetObjects
.clear();
1286 m_aIDObjects
.clear();
1287 m_aStartXRefs
.clear();
1289 m_pTrailer
= nullptr;
1290 m_pXRefStream
= nullptr;
1295 // Then we can tokenize the stream.
1297 return Tokenize(rStream
, TokenizeMode::END_OF_STREAM
, m_aElements
, nullptr);
1300 OString
PDFDocument::ReadKeyword(SvStream
& rStream
)
1304 rStream
.ReadChar(ch
);
1305 if (rStream
.IsEof())
1310 rStream
.ReadChar(ch
);
1311 if (rStream
.IsEof())
1312 return aBuf
.toString();
1314 rStream
.SeekRel(-1);
1315 return aBuf
.toString();
1318 size_t PDFDocument::FindStartXRef(SvStream
& rStream
)
1320 // Find the "startxref" token, somewhere near the end of the document.
1321 std::vector
<char> aBuf(1024);
1322 rStream
.Seek(STREAM_SEEK_TO_END
);
1323 if (rStream
.Tell() > aBuf
.size())
1324 rStream
.SeekRel(static_cast<sal_Int64
>(-1) * aBuf
.size());
1326 // The document is really short, then just read it from the start.
1328 size_t nBeforePeek
= rStream
.Tell();
1329 size_t nSize
= rStream
.ReadBytes(aBuf
.data(), aBuf
.size());
1330 rStream
.Seek(nBeforePeek
);
1331 if (nSize
!= aBuf
.size())
1333 OString
aPrefix("startxref");
1334 // Find the last startxref at the end of the document.
1335 auto itLastValid
= aBuf
.end();
1336 auto it
= aBuf
.begin();
1339 it
= std::search(it
, aBuf
.end(), aPrefix
.getStr(), aPrefix
.getStr() + aPrefix
.getLength());
1340 if (it
== aBuf
.end())
1348 if (itLastValid
== aBuf
.end())
1350 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1354 rStream
.SeekRel(itLastValid
- aBuf
.begin() + aPrefix
.getLength());
1355 if (rStream
.IsEof())
1357 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1361 PDFDocument::SkipWhitespace(rStream
);
1362 PDFNumberElement aNumber
;
1363 if (!aNumber
.Read(rStream
))
1365 return aNumber
.GetValue();
1368 void PDFDocument::ReadXRefStream(SvStream
& rStream
)
1370 // Look up the stream length in the object dictionary.
1371 if (!Tokenize(rStream
, TokenizeMode::END_OF_OBJECT
, m_aElements
, nullptr))
1373 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1377 if (m_aElements
.empty())
1379 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1383 PDFObjectElement
* pObject
= nullptr;
1384 for (const auto& pElement
: m_aElements
)
1386 if (auto pObj
= dynamic_cast<PDFObjectElement
*>(pElement
.get()))
1394 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1398 // So that the Prev key can be looked up later.
1399 m_pXRefStream
= pObject
;
1401 PDFElement
* pLookup
= pObject
->Lookup("Length");
1402 auto pNumber
= dynamic_cast<PDFNumberElement
*>(pLookup
);
1405 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1408 sal_uInt64 nLength
= pNumber
->GetValue();
1410 // Look up the stream offset.
1411 PDFStreamElement
* pStream
= nullptr;
1412 for (const auto& pElement
: m_aElements
)
1414 if (auto pS
= dynamic_cast<PDFStreamElement
*>(pElement
.get()))
1422 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1426 // Read and decompress it.
1427 rStream
.Seek(pStream
->GetOffset());
1428 std::vector
<char> aBuf(nLength
);
1429 rStream
.ReadBytes(aBuf
.data(), aBuf
.size());
1431 auto pFilter
= dynamic_cast<PDFNameElement
*>(pObject
->Lookup("Filter"));
1434 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1438 if (pFilter
->GetValue() != "FlateDecode")
1440 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter
->GetValue());
1446 if (auto pDecodeParams
= dynamic_cast<PDFDictionaryElement
*>(pObject
->Lookup("DecodeParms")))
1448 const std::map
<OString
, PDFElement
*>& rItems
= pDecodeParams
->GetItems();
1449 auto it
= rItems
.find("Columns");
1450 if (it
!= rItems
.end())
1451 if (auto pColumns
= dynamic_cast<PDFNumberElement
*>(it
->second
))
1452 nColumns
= pColumns
->GetValue();
1453 it
= rItems
.find("Predictor");
1454 if (it
!= rItems
.end())
1455 if (auto pPredictor
= dynamic_cast<PDFNumberElement
*>(it
->second
))
1456 nPredictor
= pPredictor
->GetValue();
1459 SvMemoryStream
aSource(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
1460 SvMemoryStream aStream
;
1462 aZCodec
.BeginCompression();
1463 aZCodec
.Decompress(aSource
, aStream
);
1464 if (!aZCodec
.EndCompression())
1466 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1470 // Look up the first and the last entry we need to read.
1471 auto pIndex
= dynamic_cast<PDFArrayElement
*>(pObject
->Lookup("Index"));
1472 std::vector
<size_t> aFirstObjects
;
1473 std::vector
<size_t> aNumberOfObjects
;
1476 auto pSize
= dynamic_cast<PDFNumberElement
*>(pObject
->Lookup("Size"));
1479 aFirstObjects
.push_back(0);
1480 aNumberOfObjects
.push_back(pSize
->GetValue());
1484 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1490 const std::vector
<PDFElement
*>& rIndexElements
= pIndex
->GetElements();
1491 size_t nFirstObject
= 0;
1492 for (size_t i
= 0; i
< rIndexElements
.size(); ++i
)
1496 auto pFirstObject
= dynamic_cast<PDFNumberElement
*>(rIndexElements
[i
]);
1499 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index has no first object");
1502 nFirstObject
= pFirstObject
->GetValue();
1506 auto pNumberOfObjects
= dynamic_cast<PDFNumberElement
*>(rIndexElements
[i
]);
1507 if (!pNumberOfObjects
)
1509 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index has no number of objects");
1512 aFirstObjects
.push_back(nFirstObject
);
1513 aNumberOfObjects
.push_back(pNumberOfObjects
->GetValue());
1517 // Look up the format of a single entry.
1518 const int nWSize
= 3;
1519 auto pW
= dynamic_cast<PDFArrayElement
*>(pObject
->Lookup("W"));
1520 if (!pW
|| pW
->GetElements().size() < nWSize
)
1522 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1526 // First character is the (kind of) repeated predictor.
1527 int nLineLength
= 1;
1528 for (size_t i
= 0; i
< nWSize
; ++i
)
1530 auto pI
= dynamic_cast<PDFNumberElement
*>(pW
->GetElements()[i
]);
1533 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1536 aW
[i
] = pI
->GetValue();
1537 nLineLength
+= aW
[i
];
1540 if (nPredictor
> 1 && nLineLength
- 1 != nColumns
)
1542 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1547 for (size_t nSubSection
= 0; nSubSection
< aFirstObjects
.size(); ++nSubSection
)
1549 size_t nFirstObject
= aFirstObjects
[nSubSection
];
1550 size_t nNumberOfObjects
= aNumberOfObjects
[nSubSection
];
1552 // This is the line as read from the stream.
1553 std::vector
<unsigned char> aOrigLine(nLineLength
);
1554 // This is the line as it appears after tweaking according to nPredictor.
1555 std::vector
<unsigned char> aFilteredLine(nLineLength
);
1556 for (size_t nEntry
= 0; nEntry
< nNumberOfObjects
; ++nEntry
)
1558 size_t nIndex
= nFirstObject
+ nEntry
;
1560 aStream
.ReadBytes(aOrigLine
.data(), aOrigLine
.size());
1561 if (nPredictor
> 1 && aOrigLine
[0] + 10 != nPredictor
)
1563 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is inconsistent with /DecodeParms/Predictor for object #" << nIndex
);
1567 for (int i
= 0; i
< nLineLength
; ++i
)
1575 // PNG prediction: up (on all rows).
1576 aFilteredLine
[i
] = aFilteredLine
[i
] + aOrigLine
[i
];
1579 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: " << nPredictor
);
1585 // First character is already handled above.
1588 // Start of the current field in the stream data.
1590 for (; nPos
< nOffset
+ aW
[0]; ++nPos
)
1592 unsigned char nCh
= aFilteredLine
[nPos
];
1593 nType
= (nType
<< 8) + nCh
;
1596 // Start of the object in the file stream.
1597 size_t nStreamOffset
= 0;
1599 for (; nPos
< nOffset
+ aW
[1]; ++nPos
)
1601 unsigned char nCh
= aFilteredLine
[nPos
];
1602 nStreamOffset
= (nStreamOffset
<< 8) + nCh
;
1605 // Generation number of the object.
1606 size_t nGenerationNumber
= 0;
1608 for (; nPos
< nOffset
+ aW
[2]; ++nPos
)
1610 unsigned char nCh
= aFilteredLine
[nPos
];
1611 nGenerationNumber
= (nGenerationNumber
<< 8) + nCh
;
1614 // Ignore invalid nType.
1617 if (m_aXRef
.find(nIndex
) == m_aXRef
.end())
1623 aEntry
.m_eType
= XRefEntryType::FREE
;
1626 aEntry
.m_eType
= XRefEntryType::NOT_COMPRESSED
;
1629 aEntry
.m_eType
= XRefEntryType::COMPRESSED
;
1632 aEntry
.m_nOffset
= nStreamOffset
;
1633 aEntry
.m_nGenerationNumber
= nGenerationNumber
;
1634 m_aXRef
[nIndex
] = aEntry
;
1641 void PDFDocument::ReadXRef(SvStream
& rStream
)
1643 PDFDocument::SkipWhitespace(rStream
);
1647 PDFNumberElement aFirstObject
;
1648 if (!aFirstObject
.Read(rStream
))
1650 // Next token is not a number, it'll be the trailer.
1654 if (aFirstObject
.GetValue() < 0)
1656 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1660 PDFDocument::SkipWhitespace(rStream
);
1661 PDFNumberElement aNumberOfEntries
;
1662 if (!aNumberOfEntries
.Read(rStream
))
1664 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1668 if (aNumberOfEntries
.GetValue() < 0)
1670 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1674 size_t nSize
= aNumberOfEntries
.GetValue();
1675 for (size_t nEntry
= 0; nEntry
< nSize
; ++nEntry
)
1677 size_t nIndex
= aFirstObject
.GetValue() + nEntry
;
1678 PDFDocument::SkipWhitespace(rStream
);
1679 PDFNumberElement aOffset
;
1680 if (!aOffset
.Read(rStream
))
1682 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1686 PDFDocument::SkipWhitespace(rStream
);
1687 PDFNumberElement aGenerationNumber
;
1688 if (!aGenerationNumber
.Read(rStream
))
1690 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1694 PDFDocument::SkipWhitespace(rStream
);
1695 OString aKeyword
= ReadKeyword(rStream
);
1696 if (aKeyword
!= "f" && aKeyword
!= "n")
1698 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1701 // xrefs are read in reverse order, so never update an existing
1702 // offset with an older one.
1703 if (m_aXRef
.find(nIndex
) == m_aXRef
.end())
1706 aEntry
.m_nOffset
= aOffset
.GetValue();
1707 aEntry
.m_nGenerationNumber
= aGenerationNumber
.GetValue();
1708 // Initially only the first entry is dirty.
1710 aEntry
.m_bDirty
= true;
1711 m_aXRef
[nIndex
] = aEntry
;
1713 PDFDocument::SkipWhitespace(rStream
);
1718 void PDFDocument::SkipWhitespace(SvStream
& rStream
)
1724 rStream
.ReadChar(ch
);
1725 if (rStream
.IsEof())
1730 rStream
.SeekRel(-1);
1736 void PDFDocument::SkipLineBreaks(SvStream
& rStream
)
1742 rStream
.ReadChar(ch
);
1743 if (rStream
.IsEof())
1746 if (ch
!= '\n' && ch
!= '\r')
1748 rStream
.SeekRel(-1);
1754 size_t PDFDocument::GetObjectOffset(size_t nIndex
) const
1756 auto it
= m_aXRef
.find(nIndex
);
1757 if (it
== m_aXRef
.end() || it
->second
.m_eType
== XRefEntryType::COMPRESSED
)
1759 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #" << nIndex
<< ", but failed");
1763 return it
->second
.m_nOffset
;
1766 const std::vector
< std::unique_ptr
<PDFElement
> >& PDFDocument::GetElements()
1771 /// Visits the page tree recursively, looking for page objects.
1772 static void visitPages(PDFObjectElement
* pPages
, std::vector
<PDFObjectElement
*>& rRet
)
1774 auto pKids
= dynamic_cast<PDFArrayElement
*>(pPages
->Lookup("Kids"));
1777 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1781 pPages
->setVisiting(true);
1783 for (const auto& pKid
: pKids
->GetElements())
1785 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pKid
);
1789 PDFObjectElement
* pKidObject
= pReference
->LookupObject();
1793 // detect if visiting reenters itself
1794 if (pKidObject
->alreadyVisiting())
1796 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1800 auto pName
= dynamic_cast<PDFNameElement
*>(pKidObject
->Lookup("Type"));
1801 if (pName
&& pName
->GetValue() == "Pages")
1802 // Pages inside pages: recurse.
1803 visitPages(pKidObject
, rRet
);
1805 // Found an actual page.
1806 rRet
.push_back(pKidObject
);
1809 pPages
->setVisiting(false);
1812 std::vector
<PDFObjectElement
*> PDFDocument::GetPages()
1814 std::vector
<PDFObjectElement
*> aRet
;
1816 PDFReferenceElement
* pRoot
= nullptr;
1819 PDFTrailerElement
* pTrailer
= nullptr;
1820 if (!m_aTrailerOffsets
.empty())
1822 // Get access to the latest trailer, and work with the keys of that
1824 auto it
= m_aOffsetTrailers
.find(m_aTrailerOffsets
[0]);
1825 if (it
!= m_aOffsetTrailers
.end())
1826 pTrailer
= it
->second
;
1830 pRoot
= dynamic_cast<PDFReferenceElement
*>(pTrailer
->Lookup("Root"));
1831 else if (m_pXRefStream
)
1832 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Root"));
1836 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1840 PDFObjectElement
* pCatalog
= pRoot
->LookupObject();
1843 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1847 PDFObjectElement
* pPages
= pCatalog
->LookupObject("Pages");
1850 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog
->GetObjectValue() << ") has no pages");
1854 visitPages(pPages
, aRet
);
1859 void PDFDocument::PushBackEOF(size_t nOffset
)
1861 m_aEOFs
.push_back(nOffset
);
1864 std::vector
<PDFObjectElement
*> PDFDocument::GetSignatureWidgets()
1866 std::vector
<PDFObjectElement
*> aRet
;
1868 std::vector
<PDFObjectElement
*> aPages
= GetPages();
1870 for (const auto& pPage
: aPages
)
1875 PDFElement
* pAnnotsElement
= pPage
->Lookup("Annots");
1876 auto pAnnots
= dynamic_cast<PDFArrayElement
*>(pAnnotsElement
);
1879 // Annots is not an array, see if it's a reference to an object
1880 // with a direct array.
1881 auto pAnnotsRef
= dynamic_cast<PDFReferenceElement
*>(pAnnotsElement
);
1884 if (PDFObjectElement
* pAnnotsObject
= pAnnotsRef
->LookupObject())
1886 pAnnots
= pAnnotsObject
->GetArray();
1894 for (const auto& pAnnot
: pAnnots
->GetElements())
1896 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pAnnot
);
1900 PDFObjectElement
* pAnnotObject
= pReference
->LookupObject();
1904 auto pFT
= dynamic_cast<PDFNameElement
*>(pAnnotObject
->Lookup("FT"));
1905 if (!pFT
|| pFT
->GetValue() != "Sig")
1908 aRet
.push_back(pAnnotObject
);
1915 int PDFDocument::AsHex(char ch
)
1922 if (ch
>= 'a' && ch
<= 'f')
1924 else if (ch
>= 'A' && ch
<= 'F')
1933 std::vector
<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement
* pElement
)
1935 std::vector
<unsigned char> aRet
;
1936 const OString
& rHex
= pElement
->GetValue();
1937 size_t nHexLen
= rHex
.getLength();
1941 for (size_t i
= 0; i
< nHexLen
; ++i
)
1944 sal_Int8 nParsed
= AsHex(rHex
[i
]);
1947 SAL_WARN("vcl.filter", "PDFDocument::DecodeHexString: invalid hex value");
1954 aRet
.push_back(nByte
);
1964 PDFCommentElement::PDFCommentElement(PDFDocument
& rDoc
)
1969 bool PDFCommentElement::Read(SvStream
& rStream
)
1971 // Read from (including) the % char till (excluding) the end of the line/stream.
1974 rStream
.ReadChar(ch
);
1977 if (ch
== '\n' || ch
== '\r' || rStream
.IsEof())
1979 m_aComment
= aBuf
.makeStringAndClear();
1981 if (m_aComment
.startsWith("%%EOF"))
1982 m_rDoc
.PushBackEOF(rStream
.Tell());
1984 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment
<< "'");
1988 rStream
.ReadChar(ch
);
1994 PDFNumberElement::PDFNumberElement() = default;
1996 bool PDFNumberElement::Read(SvStream
& rStream
)
1999 m_nOffset
= rStream
.Tell();
2001 rStream
.ReadChar(ch
);
2002 if (rStream
.IsEof())
2006 if (!isdigit(ch
) && ch
!= '-' && ch
!= '.')
2008 rStream
.SeekRel(-1);
2011 while (!rStream
.IsEof())
2013 if (!isdigit(ch
) && ch
!= '-' && ch
!= '.')
2015 rStream
.SeekRel(-1);
2016 m_nLength
= rStream
.Tell() - m_nOffset
;
2017 m_fValue
= aBuf
.makeStringAndClear().toDouble();
2018 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue
<< "'");
2022 rStream
.ReadChar(ch
);
2028 sal_uInt64
PDFNumberElement::GetLocation() const
2033 sal_uInt64
PDFNumberElement::GetLength() const
2038 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/)
2042 bool PDFBooleanElement::Read(SvStream
& /*rStream*/)
2047 bool PDFNullElement::Read(SvStream
& /*rStream*/)
2052 bool PDFHexStringElement::Read(SvStream
& rStream
)
2055 rStream
.ReadChar(ch
);
2058 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2061 rStream
.ReadChar(ch
);
2064 while (!rStream
.IsEof())
2068 m_aValue
= aBuf
.makeStringAndClear();
2069 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: m_aValue length is " << m_aValue
.getLength());
2073 rStream
.ReadChar(ch
);
2079 const OString
& PDFHexStringElement::GetValue() const
2084 bool PDFLiteralStringElement::Read(SvStream
& rStream
)
2088 rStream
.ReadChar(ch
);
2091 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2095 rStream
.ReadChar(ch
);
2098 while (!rStream
.IsEof())
2100 if (ch
== ')' && nPrevCh
!= '\\')
2102 m_aValue
= aBuf
.makeStringAndClear();
2103 SAL_INFO("vcl.filter", "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue
<< "'");
2108 rStream
.ReadChar(ch
);
2114 const OString
& PDFLiteralStringElement::GetValue() const
2119 PDFTrailerElement::PDFTrailerElement(PDFDocument
& rDoc
)
2124 bool PDFTrailerElement::Read(SvStream
& rStream
)
2126 m_nOffset
= rStream
.Tell();
2130 PDFElement
* PDFTrailerElement::Lookup(const OString
& rDictionaryKey
)
2132 if (m_aDictionary
.empty())
2133 PDFDictionaryElement::Parse(m_rDoc
.GetElements(), this, m_aDictionary
);
2135 return PDFDictionaryElement::Lookup(m_aDictionary
, rDictionaryKey
);
2138 sal_uInt64
PDFTrailerElement::GetLocation() const
2143 double PDFNumberElement::GetValue() const
2148 PDFObjectElement::PDFObjectElement(PDFDocument
& rDoc
, double fObjectValue
, double fGenerationValue
)
2150 m_fObjectValue(fObjectValue
),
2151 m_fGenerationValue(fGenerationValue
),
2152 m_pNumberElement(nullptr),
2153 m_nDictionaryOffset(0),
2154 m_nDictionaryLength(0),
2155 m_pDictionaryElement(nullptr),
2158 m_pArrayElement(nullptr),
2159 m_pStreamElement(nullptr)
2163 bool PDFObjectElement::Read(SvStream
& /*rStream*/)
2165 SAL_INFO("vcl.filter", "PDFObjectElement::Read: " << m_fObjectValue
<< " " << m_fGenerationValue
<< " obj");
2169 PDFDictionaryElement::PDFDictionaryElement() = default;
2171 size_t PDFDictionaryElement::Parse(const std::vector
< std::unique_ptr
<PDFElement
> >& rElements
, PDFElement
* pThis
, std::map
<OString
, PDFElement
*>& rDictionary
)
2173 // The index of last parsed element, in case of nested dictionaries.
2176 if (!rDictionary
.empty())
2179 pThis
->setParsing(true);
2181 auto pThisObject
= dynamic_cast<PDFObjectElement
*>(pThis
);
2182 // This is set to non-nullptr here for nested dictionaries only.
2183 auto pThisDictionary
= dynamic_cast<PDFDictionaryElement
*>(pThis
);
2185 // Find out where the dictionary for this object starts.
2187 for (size_t i
= 0; i
< rElements
.size(); ++i
)
2189 if (rElements
[i
].get() == pThis
)
2197 sal_uInt64 nNameOffset
= 0;
2198 std::vector
<PDFNumberElement
*> aNumbers
;
2199 // The array value we're in -- if any.
2200 PDFArrayElement
* pArray
= nullptr;
2201 sal_uInt64 nDictionaryOffset
= 0;
2202 int nDictionaryDepth
= 0;
2203 // Toplevel dictionary found (not inside an array).
2204 bool bDictionaryFound
= false;
2205 // Toplevel array found (not inside a dictionary).
2206 bool bArrayFound
= false;
2207 for (size_t i
= nIndex
; i
< rElements
.size(); ++i
)
2209 // Dictionary tokens can be nested, track enter/leave.
2210 if (auto pDictionary
= dynamic_cast<PDFDictionaryElement
*>(rElements
[i
].get()))
2212 bDictionaryFound
= true;
2213 if (++nDictionaryDepth
== 1)
2215 // First dictionary start, track start offset.
2216 nDictionaryOffset
= pDictionary
->m_nLocation
;
2220 // The the toplevel dictionary of the object.
2221 pThisObject
->SetDictionary(pDictionary
);
2222 pThisDictionary
= pDictionary
;
2223 pThisObject
->SetDictionaryOffset(nDictionaryOffset
);
2226 else if (!pDictionary
->alreadyParsing())
2228 // Nested dictionary.
2230 = PDFDictionaryElement::Parse(rElements
, pDictionary
, pDictionary
->m_aItems
);
2231 if (nexti
>= i
) // ensure we go forwards and not endlessly loop
2234 rDictionary
[aName
] = pDictionary
;
2240 if (auto pEndDictionary
= dynamic_cast<PDFEndDictionaryElement
*>(rElements
[i
].get()))
2242 if (--nDictionaryDepth
== 0)
2244 // Last dictionary end, track length and stop parsing.
2246 pThisObject
->SetDictionaryLength(pEndDictionary
->GetLocation() - nDictionaryOffset
);
2252 auto pName
= dynamic_cast<PDFNameElement
*>(rElements
[i
].get());
2255 if (!aNumbers
.empty())
2257 PDFNumberElement
* pNumber
= aNumbers
.back();
2258 rDictionary
[aName
] = pNumber
;
2259 if (pThisDictionary
)
2261 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2262 pThisDictionary
->SetKeyValueLength(aName
, pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
);
2268 if (aName
.isEmpty())
2271 aName
= pName
->GetValue();
2272 nNameOffset
= pName
->GetLocation();
2278 if (bDictionaryFound
)
2279 // Array inside dictionary.
2280 pArray
->PushBack(pName
);
2284 // Name-name key-value.
2285 rDictionary
[aName
] = pName
;
2286 if (pThisDictionary
)
2288 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2289 pThisDictionary
->SetKeyValueLength(aName
, pName
->GetLocation() + pName
->GetLength() - nNameOffset
);
2297 auto pArr
= dynamic_cast<PDFArrayElement
*>(rElements
[i
].get());
2305 auto pEndArr
= dynamic_cast<PDFEndArrayElement
*>(rElements
[i
].get());
2306 if (pArray
&& pEndArr
)
2308 if (!aNumbers
.empty())
2310 for (auto& pNumber
: aNumbers
)
2311 pArray
->PushBack(pNumber
);
2314 rDictionary
[aName
] = pArray
;
2315 if (pThisDictionary
)
2317 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2318 // Include the ending ']' in the length of the key - (array)value pair length.
2319 pThisDictionary
->SetKeyValueLength(aName
, pEndArr
->GetOffset() - nNameOffset
+ 1);
2326 auto pReference
= dynamic_cast<PDFReferenceElement
*>(rElements
[i
].get());
2331 rDictionary
[aName
] = pReference
;
2332 if (pThisDictionary
)
2334 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2335 pThisDictionary
->SetKeyValueLength(aName
, pReference
->GetOffset() - nNameOffset
);
2341 if (bDictionaryFound
)
2342 // Array inside dictionary.
2343 pArray
->PushBack(pReference
);
2349 auto pLiteralString
= dynamic_cast<PDFLiteralStringElement
*>(rElements
[i
].get());
2352 rDictionary
[aName
] = pLiteralString
;
2353 if (pThisDictionary
)
2354 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2359 auto pBoolean
= dynamic_cast<PDFBooleanElement
*>(rElements
[i
].get());
2362 rDictionary
[aName
] = pBoolean
;
2363 if (pThisDictionary
)
2364 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2369 auto pHexString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
].get());
2374 rDictionary
[aName
] = pHexString
;
2375 if (pThisDictionary
)
2376 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2381 pArray
->PushBack(pHexString
);
2386 if (dynamic_cast<PDFEndObjectElement
*>(rElements
[i
].get()))
2389 // Just remember this, so that in case it's not a reference parameter,
2390 // we can handle it later.
2391 auto pNumber
= dynamic_cast<PDFNumberElement
*>(rElements
[i
].get());
2393 aNumbers
.push_back(pNumber
);
2396 if (!aNumbers
.empty())
2398 rDictionary
[aName
] = aNumbers
.back();
2399 if (pThisDictionary
)
2400 pThisDictionary
->SetKeyOffset(aName
, nNameOffset
);
2405 pThis
->setParsing(false);
2410 PDFElement
* PDFDictionaryElement::Lookup(const std::map
<OString
, PDFElement
*>& rDictionary
, const OString
& rKey
)
2412 auto it
= rDictionary
.find(rKey
);
2413 if (it
== rDictionary
.end())
2419 PDFObjectElement
* PDFDictionaryElement::LookupObject(const OString
& rDictionaryKey
)
2421 auto pKey
= dynamic_cast<PDFReferenceElement
*>(PDFDictionaryElement::Lookup(m_aItems
, rDictionaryKey
));
2424 SAL_WARN("vcl.filter", "PDFDictionaryElement::LookupObject: no such key with reference value: " << rDictionaryKey
);
2428 return pKey
->LookupObject();
2431 PDFElement
* PDFDictionaryElement::LookupElement(const OString
& rDictionaryKey
)
2433 return PDFDictionaryElement::Lookup(m_aItems
, rDictionaryKey
);
2436 PDFElement
* PDFObjectElement::Lookup(const OString
& rDictionaryKey
)
2438 if (m_aDictionary
.empty())
2440 if (!m_aElements
.empty())
2441 // This is a stored object in an object stream.
2442 PDFDictionaryElement::Parse(m_aElements
, this, m_aDictionary
);
2444 // Normal object: elements are stored as members of the document itself.
2445 PDFDictionaryElement::Parse(m_rDoc
.GetElements(), this, m_aDictionary
);
2448 return PDFDictionaryElement::Lookup(m_aDictionary
, rDictionaryKey
);
2451 PDFObjectElement
* PDFObjectElement::LookupObject(const OString
& rDictionaryKey
)
2453 auto pKey
= dynamic_cast<PDFReferenceElement
*>(Lookup(rDictionaryKey
));
2456 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: " << rDictionaryKey
);
2460 return pKey
->LookupObject();
2463 double PDFObjectElement::GetObjectValue() const
2465 return m_fObjectValue
;
2468 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset
)
2470 m_nDictionaryOffset
= nDictionaryOffset
;
2473 sal_uInt64
PDFObjectElement::GetDictionaryOffset()
2475 if (m_aDictionary
.empty())
2476 PDFDictionaryElement::Parse(m_rDoc
.GetElements(), this, m_aDictionary
);
2478 return m_nDictionaryOffset
;
2481 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset
)
2483 m_nArrayOffset
= nArrayOffset
;
2486 sal_uInt64
PDFObjectElement::GetArrayOffset()
2488 return m_nArrayOffset
;
2491 void PDFDictionaryElement::SetKeyOffset(const OString
& rKey
, sal_uInt64 nOffset
)
2493 m_aDictionaryKeyOffset
[rKey
] = nOffset
;
2496 void PDFDictionaryElement::SetKeyValueLength(const OString
& rKey
, sal_uInt64 nLength
)
2498 m_aDictionaryKeyValueLength
[rKey
] = nLength
;
2501 sal_uInt64
PDFDictionaryElement::GetKeyOffset(const OString
& rKey
) const
2503 auto it
= m_aDictionaryKeyOffset
.find(rKey
);
2504 if (it
== m_aDictionaryKeyOffset
.end())
2510 sal_uInt64
PDFDictionaryElement::GetKeyValueLength(const OString
& rKey
) const
2512 auto it
= m_aDictionaryKeyValueLength
.find(rKey
);
2513 if (it
== m_aDictionaryKeyValueLength
.end())
2519 const std::map
<OString
, PDFElement
*>& PDFDictionaryElement::GetItems() const
2524 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength
)
2526 m_nDictionaryLength
= nDictionaryLength
;
2529 sal_uInt64
PDFObjectElement::GetDictionaryLength()
2531 if (m_aDictionary
.empty())
2532 PDFDictionaryElement::Parse(m_rDoc
.GetElements(), this, m_aDictionary
);
2534 return m_nDictionaryLength
;
2537 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength
)
2539 m_nArrayLength
= nArrayLength
;
2542 sal_uInt64
PDFObjectElement::GetArrayLength()
2544 return m_nArrayLength
;
2547 PDFDictionaryElement
* PDFObjectElement::GetDictionary()
2549 if (m_aDictionary
.empty())
2550 PDFDictionaryElement::Parse(m_rDoc
.GetElements(), this, m_aDictionary
);
2551 return m_pDictionaryElement
;
2554 void PDFObjectElement::SetDictionary(PDFDictionaryElement
* pDictionaryElement
)
2556 m_pDictionaryElement
= pDictionaryElement
;
2559 void PDFObjectElement::SetNumberElement(PDFNumberElement
* pNumberElement
)
2561 m_pNumberElement
= pNumberElement
;
2564 PDFNumberElement
* PDFObjectElement::GetNumberElement() const
2566 return m_pNumberElement
;
2569 const std::vector
<PDFReferenceElement
*>& PDFObjectElement::GetDictionaryReferences() const
2571 return m_aDictionaryReferences
;
2574 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement
* pReference
)
2576 m_aDictionaryReferences
.push_back(pReference
);
2579 const std::map
<OString
, PDFElement
*>& PDFObjectElement::GetDictionaryItems()
2581 if (m_aDictionary
.empty())
2582 PDFDictionaryElement::Parse(m_rDoc
.GetElements(), this, m_aDictionary
);
2584 return m_aDictionary
;
2587 void PDFObjectElement::SetArray(PDFArrayElement
* pArrayElement
)
2589 m_pArrayElement
= pArrayElement
;
2592 void PDFObjectElement::SetStream(PDFStreamElement
* pStreamElement
)
2594 m_pStreamElement
= pStreamElement
;
2597 PDFStreamElement
* PDFObjectElement::GetStream() const
2599 return m_pStreamElement
;
2602 PDFArrayElement
* PDFObjectElement::GetArray() const
2604 return m_pArrayElement
;
2607 void PDFObjectElement::ParseStoredObjects()
2609 if (!m_pStreamElement
)
2611 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2615 auto pType
= dynamic_cast<PDFNameElement
*>(Lookup("Type"));
2616 if (!pType
|| pType
->GetValue() != "ObjStm")
2619 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2621 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected type: " << pType
->GetValue());
2625 auto pFilter
= dynamic_cast<PDFNameElement
*>(Lookup("Filter"));
2626 if (!pFilter
|| pFilter
->GetValue() != "FlateDecode")
2629 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2631 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter
->GetValue());
2635 auto pFirst
= dynamic_cast<PDFNumberElement
*>(Lookup("First"));
2638 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2642 auto pN
= dynamic_cast<PDFNumberElement
*>(Lookup("N"));
2645 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2648 size_t nN
= pN
->GetValue();
2650 auto pLength
= dynamic_cast<PDFNumberElement
*>(Lookup("Length"));
2653 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2656 size_t nLength
= pLength
->GetValue();
2658 // Read and decompress it.
2659 SvMemoryStream
& rEditBuffer
= m_rDoc
.GetEditBuffer();
2660 rEditBuffer
.Seek(m_pStreamElement
->GetOffset());
2661 std::vector
<char> aBuf(nLength
);
2662 rEditBuffer
.ReadBytes(aBuf
.data(), aBuf
.size());
2663 SvMemoryStream
aSource(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
2664 SvMemoryStream aStream
;
2666 aZCodec
.BeginCompression();
2667 aZCodec
.Decompress(aSource
, aStream
);
2668 if (!aZCodec
.EndCompression())
2670 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2674 aStream
.Seek(STREAM_SEEK_TO_END
);
2675 nLength
= aStream
.Tell();
2677 std::vector
<size_t> aObjNums
;
2678 std::vector
<size_t> aOffsets
;
2679 std::vector
<size_t> aLengths
;
2680 // First iterate over and find out the lengths.
2681 for (size_t nObject
= 0; nObject
< nN
; ++nObject
)
2683 PDFNumberElement aObjNum
;
2684 if (!aObjNum
.Read(aStream
))
2686 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: failed to read object number");
2689 aObjNums
.push_back(aObjNum
.GetValue());
2691 PDFDocument::SkipWhitespace(aStream
);
2693 PDFNumberElement aByteOffset
;
2694 if (!aByteOffset
.Read(aStream
))
2696 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2699 aOffsets
.push_back(pFirst
->GetValue() + aByteOffset
.GetValue());
2701 if (aOffsets
.size() > 1)
2702 aLengths
.push_back(aOffsets
.back() - aOffsets
[aOffsets
.size() - 2]);
2703 if (nObject
+ 1 == nN
)
2704 aLengths
.push_back(nLength
- aOffsets
.back());
2706 PDFDocument::SkipWhitespace(aStream
);
2709 // Now create streams with the proper length and tokenize the data.
2710 for (size_t nObject
= 0; nObject
< nN
; ++nObject
)
2712 size_t nObjNum
= aObjNums
[nObject
];
2713 size_t nOffset
= aOffsets
[nObject
];
2714 size_t nLen
= aLengths
[nObject
];
2716 aStream
.Seek(nOffset
);
2717 m_aStoredElements
.push_back(o3tl::make_unique
<PDFObjectElement
>(m_rDoc
, nObjNum
, 0));
2718 PDFObjectElement
* pStored
= m_aStoredElements
.back().get();
2722 aStream
.ReadBytes(aBuf
.data(), aBuf
.size());
2723 SvMemoryStream
aStoredStream(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
2725 m_rDoc
.Tokenize(aStoredStream
, TokenizeMode::STORED_OBJECT
, pStored
->GetStoredElements(), pStored
);
2726 // This is how references know the object is stored inside this object stream.
2727 m_rDoc
.SetIDObject(nObjNum
, pStored
);
2729 // Store the stream of the object in the object stream for later use.
2730 std::unique_ptr
<SvMemoryStream
> pStreamBuffer(new SvMemoryStream());
2731 aStoredStream
.Seek(0);
2732 pStreamBuffer
->WriteStream(aStoredStream
);
2733 pStored
->SetStreamBuffer(pStreamBuffer
);
2737 std::vector
< std::unique_ptr
<PDFElement
> >& PDFObjectElement::GetStoredElements()
2742 SvMemoryStream
* PDFObjectElement::GetStreamBuffer() const
2744 return m_pStreamBuffer
.get();
2747 void PDFObjectElement::SetStreamBuffer(std::unique_ptr
<SvMemoryStream
>& pStreamBuffer
)
2749 m_pStreamBuffer
= std::move(pStreamBuffer
);
2752 PDFDocument
& PDFObjectElement::GetDocument()
2757 PDFReferenceElement::PDFReferenceElement(PDFDocument
& rDoc
, PDFNumberElement
& rObject
, PDFNumberElement
& rGeneration
)
2759 m_fObjectValue(rObject
.GetValue()),
2760 m_fGenerationValue(rGeneration
.GetValue()),
2765 PDFNumberElement
& PDFReferenceElement::GetObjectElement() const
2770 bool PDFReferenceElement::Read(SvStream
& rStream
)
2772 SAL_INFO("vcl.filter", "PDFReferenceElement::Read: " << m_fObjectValue
<< " " << m_fGenerationValue
<< " R");
2773 m_nOffset
= rStream
.Tell();
2777 sal_uInt64
PDFReferenceElement::GetOffset() const
2782 double PDFReferenceElement::LookupNumber(SvStream
& rStream
) const
2784 size_t nOffset
= m_rDoc
.GetObjectOffset(m_fObjectValue
);
2787 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #" << m_fObjectValue
);
2791 sal_uInt64 nOrigPos
= rStream
.Tell();
2792 comphelper::ScopeGuard
g([&]()
2794 rStream
.Seek(nOrigPos
);
2797 rStream
.Seek(nOffset
);
2799 PDFDocument::SkipWhitespace(rStream
);
2800 PDFNumberElement aNumber
;
2801 bool bRet
= aNumber
.Read(rStream
);
2802 if (!bRet
|| aNumber
.GetValue() != m_fObjectValue
)
2804 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset points to not matching object");
2810 PDFDocument::SkipWhitespace(rStream
);
2811 PDFNumberElement aNumber
;
2812 bool bRet
= aNumber
.Read(rStream
);
2813 if (!bRet
|| aNumber
.GetValue() != m_fGenerationValue
)
2815 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2821 PDFDocument::SkipWhitespace(rStream
);
2822 OString aKeyword
= PDFDocument::ReadKeyword(rStream
);
2823 if (aKeyword
!= "obj")
2825 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2830 PDFDocument::SkipWhitespace(rStream
);
2831 PDFNumberElement aNumber
;
2832 if (!aNumber
.Read(rStream
))
2834 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: failed to read referenced number");
2838 return aNumber
.GetValue();
2841 PDFObjectElement
* PDFReferenceElement::LookupObject()
2843 return m_rDoc
.LookupObject(m_fObjectValue
);
2846 PDFObjectElement
* PDFDocument::LookupObject(size_t nObjectNumber
)
2848 auto itIDObjects
= m_aIDObjects
.find(nObjectNumber
);
2850 if (itIDObjects
!= m_aIDObjects
.end())
2851 return itIDObjects
->second
;
2853 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber
);
2857 SvMemoryStream
& PDFDocument::GetEditBuffer()
2859 return m_aEditBuffer
;
2862 int PDFReferenceElement::GetObjectValue() const
2864 return m_fObjectValue
;
2867 int PDFReferenceElement::GetGenerationValue() const
2869 return m_fGenerationValue
;
2872 bool PDFDictionaryElement::Read(SvStream
& rStream
)
2875 rStream
.ReadChar(ch
);
2878 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch
);
2882 if (rStream
.IsEof())
2884 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2888 rStream
.ReadChar(ch
);
2891 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch
);
2895 m_nLocation
= rStream
.Tell();
2897 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2902 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2904 sal_uInt64
PDFEndDictionaryElement::GetLocation() const
2909 bool PDFEndDictionaryElement::Read(SvStream
& rStream
)
2911 m_nLocation
= rStream
.Tell();
2913 rStream
.ReadChar(ch
);
2916 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch
);
2920 if (rStream
.IsEof())
2922 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2926 rStream
.ReadChar(ch
);
2929 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch
);
2933 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2938 PDFNameElement::PDFNameElement()
2944 bool PDFNameElement::Read(SvStream
& rStream
)
2947 rStream
.ReadChar(ch
);
2950 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch
);
2953 m_nLocation
= rStream
.Tell();
2955 if (rStream
.IsEof())
2957 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2961 // Read till the first white-space.
2963 rStream
.ReadChar(ch
);
2964 while (!rStream
.IsEof())
2966 if (isspace(ch
) || ch
== '/' || ch
== '[' || ch
== ']' || ch
== '<' || ch
== '>' || ch
== '(')
2968 rStream
.SeekRel(-1);
2969 m_aValue
= aBuf
.makeStringAndClear();
2970 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue
<< "'");
2974 rStream
.ReadChar(ch
);
2980 const OString
& PDFNameElement::GetValue() const
2985 sal_uInt64
PDFNameElement::GetLocation() const
2990 sal_uInt64
PDFNameElement::GetLength() const
2995 PDFStreamElement::PDFStreamElement(size_t nLength
)
2996 : m_nLength(nLength
),
3001 bool PDFStreamElement::Read(SvStream
& rStream
)
3003 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength
);
3004 m_nOffset
= rStream
.Tell();
3005 std::vector
<unsigned char> aBytes(m_nLength
);
3006 rStream
.ReadBytes(aBytes
.data(), aBytes
.size());
3007 m_aMemory
.WriteBytes(aBytes
.data(), aBytes
.size());
3009 return rStream
.good();
3012 SvMemoryStream
& PDFStreamElement::GetMemory()
3017 sal_uInt64
PDFStreamElement::GetOffset() const
3022 bool PDFEndStreamElement::Read(SvStream
& /*rStream*/)
3027 bool PDFEndObjectElement::Read(SvStream
& /*rStream*/)
3032 PDFArrayElement::PDFArrayElement(PDFObjectElement
* pObject
)
3033 : m_pObject(pObject
)
3037 bool PDFArrayElement::Read(SvStream
& rStream
)
3040 rStream
.ReadChar(ch
);
3043 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch
);
3046 m_nOffset
= rStream
.Tell();
3048 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
3053 void PDFArrayElement::PushBack(PDFElement
* pElement
)
3056 SAL_INFO("vcl.filter", "PDFArrayElement::PushBack: object is " << m_pObject
->GetObjectValue());
3057 m_aElements
.push_back(pElement
);
3060 const std::vector
<PDFElement
*>& PDFArrayElement::GetElements()
3065 PDFEndArrayElement::PDFEndArrayElement() = default;
3067 bool PDFEndArrayElement::Read(SvStream
& rStream
)
3069 m_nOffset
= rStream
.Tell();
3071 rStream
.ReadChar(ch
);
3074 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch
);
3078 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3083 sal_uInt64
PDFEndArrayElement::GetOffset() const
3088 } // namespace filter
3091 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */