1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <vcl/filter/pdfdocument.hxx>
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
19 #include <comphelper/scopeguard.hxx>
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 #include <o3tl/safeint.hxx>
31 #include <pdf/objectcopier.hxx>
33 using namespace com::sun::star
;
37 XRefEntry::XRefEntry() = default;
39 PDFDocument::PDFDocument() = default;
41 PDFDocument::~PDFDocument() = default;
43 bool PDFDocument::RemoveSignature(size_t nPosition
)
45 std::vector
<PDFObjectElement
*> aSignatures
= GetSignatureWidgets();
46 if (nPosition
>= aSignatures
.size())
48 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
52 if (aSignatures
.size() != m_aEOFs
.size() - 1)
54 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
55 "and incremental updates");
59 // The EOF offset is the end of the original file, without the signature at
61 m_aEditBuffer
.Seek(m_aEOFs
[nPosition
]);
62 // Drop all bytes after the current position.
63 m_aEditBuffer
.SetStreamSize(m_aEditBuffer
.Tell() + 1);
65 return m_aEditBuffer
.good();
68 sal_Int32
PDFDocument::createObject()
70 sal_Int32 nObject
= m_aXRef
.size();
71 m_aXRef
[nObject
] = XRefEntry();
75 bool PDFDocument::updateObject(sal_Int32 nObject
)
77 if (o3tl::make_unsigned(nObject
) >= m_aXRef
.size())
79 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
84 aEntry
.SetOffset(m_aEditBuffer
.Tell());
85 aEntry
.SetDirty(true);
86 m_aXRef
[nObject
] = aEntry
;
90 bool PDFDocument::writeBuffer(const void* pBuffer
, sal_uInt64 nBytes
)
92 std::size_t nWritten
= m_aEditBuffer
.WriteBytes(pBuffer
, nBytes
);
93 return nWritten
== nBytes
;
96 void PDFDocument::SetSignatureLine(const std::vector
<sal_Int8
>& rSignatureLine
)
98 m_aSignatureLine
= rSignatureLine
;
101 void PDFDocument::SetSignaturePage(size_t nPage
) { m_nSignaturePage
= nPage
; }
103 sal_uInt32
PDFDocument::GetNextSignature()
106 for (const auto& pSignature
: GetSignatureWidgets())
108 auto pT
= dynamic_cast<PDFLiteralStringElement
*>(pSignature
->Lookup("T"));
112 const OString
& rValue
= pT
->GetValue();
113 const OString aPrefix
= "Signature";
114 if (!rValue
.startsWith(aPrefix
))
117 nRet
= std::max(nRet
, rValue
.copy(aPrefix
.getLength()).toUInt32());
123 sal_Int32
PDFDocument::WriteSignatureObject(const OUString
& rDescription
, bool bAdES
,
124 sal_uInt64
& rLastByteRangeOffset
,
125 sal_Int64
& rContentOffset
)
127 // Write signature object.
128 sal_Int32 nSignatureId
= m_aXRef
.size();
129 XRefEntry aSignatureEntry
;
130 aSignatureEntry
.SetOffset(m_aEditBuffer
.Tell());
131 aSignatureEntry
.SetDirty(true);
132 m_aXRef
[nSignatureId
] = aSignatureEntry
;
133 OStringBuffer aSigBuffer
;
134 aSigBuffer
.append(nSignatureId
);
135 aSigBuffer
.append(" 0 obj\n");
136 aSigBuffer
.append("<</Contents <");
137 rContentOffset
= aSignatureEntry
.GetOffset() + aSigBuffer
.getLength();
138 // Reserve space for the PKCS#7 object.
139 OStringBuffer
aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH
);
140 comphelper::string::padToLength(aContentFiller
, MAX_SIGNATURE_CONTENT_LENGTH
, '0');
141 aSigBuffer
.append(aContentFiller
.makeStringAndClear());
142 aSigBuffer
.append(">\n/Type/Sig/SubFilter");
144 aSigBuffer
.append("/ETSI.CAdES.detached");
146 aSigBuffer
.append("/adbe.pkcs7.detached");
149 aSigBuffer
.append(" /M (");
150 aSigBuffer
.append(vcl::PDFWriter::GetDateTime());
151 aSigBuffer
.append(")");
153 // Byte range: we can write offset1-length1 and offset2 right now, will
154 // write length2 later.
155 aSigBuffer
.append(" /ByteRange [ 0 ");
156 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
157 aSigBuffer
.append(rContentOffset
- 1);
158 aSigBuffer
.append(" ");
159 aSigBuffer
.append(rContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
160 aSigBuffer
.append(" ");
161 rLastByteRangeOffset
= aSignatureEntry
.GetOffset() + aSigBuffer
.getLength();
162 // We don't know how many bytes we need for the last ByteRange value, this
164 OStringBuffer aByteRangeFiller
;
165 comphelper::string::padToLength(aByteRangeFiller
, 100, ' ');
166 aSigBuffer
.append(aByteRangeFiller
.makeStringAndClear());
167 // Finish the Sig obj.
168 aSigBuffer
.append(" /Filter/Adobe.PPKMS");
170 if (!rDescription
.isEmpty())
172 aSigBuffer
.append("/Reason<");
173 vcl::PDFWriter::AppendUnicodeTextString(rDescription
, aSigBuffer
);
174 aSigBuffer
.append(">");
177 aSigBuffer
.append(" >>\nendobj\n\n");
178 m_aEditBuffer
.WriteOString(aSigBuffer
.toString());
183 sal_Int32
PDFDocument::WriteAppearanceObject(tools::Rectangle
& rSignatureRectangle
)
185 PDFDocument aPDFDocument
;
186 filter::PDFObjectElement
* pPage
= nullptr;
187 std::vector
<filter::PDFObjectElement
*> aContentStreams
;
189 if (!m_aSignatureLine
.empty())
191 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
193 SvMemoryStream aPDFStream
;
194 aPDFStream
.WriteBytes(m_aSignatureLine
.data(), m_aSignatureLine
.size());
196 if (!aPDFDocument
.Read(aPDFStream
))
198 SAL_WARN("vcl.filter",
199 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
203 std::vector
<filter::PDFObjectElement
*> aPages
= aPDFDocument
.GetPages();
206 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
213 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
217 // Calculate the bounding box.
218 PDFElement
* pMediaBox
= pPage
->Lookup("MediaBox");
219 auto pMediaBoxArray
= dynamic_cast<PDFArrayElement
*>(pMediaBox
);
220 if (!pMediaBoxArray
|| pMediaBoxArray
->GetElements().size() < 4)
222 SAL_WARN("vcl.filter",
223 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
226 const std::vector
<PDFElement
*>& rMediaBoxElements
= pMediaBoxArray
->GetElements();
227 auto pWidth
= dynamic_cast<PDFNumberElement
*>(rMediaBoxElements
[2]);
230 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
233 rSignatureRectangle
.setWidth(pWidth
->GetValue());
234 auto pHeight
= dynamic_cast<PDFNumberElement
*>(rMediaBoxElements
[3]);
237 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
240 rSignatureRectangle
.setHeight(pHeight
->GetValue());
242 if (PDFObjectElement
* pContentStream
= pPage
->LookupObject("Contents"))
244 aContentStreams
.push_back(pContentStream
);
247 if (aContentStreams
.empty())
249 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
253 m_aSignatureLine
.clear();
255 // Write appearance object: allocate an ID.
256 sal_Int32 nAppearanceId
= m_aXRef
.size();
257 m_aXRef
[nAppearanceId
] = XRefEntry();
259 // Write the object content.
260 SvMemoryStream aEditBuffer
;
261 aEditBuffer
.WriteUInt32AsString(nAppearanceId
);
262 aEditBuffer
.WriteCharPtr(" 0 obj\n");
263 aEditBuffer
.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
265 PDFObjectCopier
aCopier(*this);
266 if (!aContentStreams
.empty())
268 assert(pPage
&& "aContentStreams is only filled if there was a pPage");
269 OStringBuffer aBuffer
;
270 aCopier
.copyPageResources(pPage
, aBuffer
);
271 aEditBuffer
.WriteOString(aBuffer
.makeStringAndClear());
274 aEditBuffer
.WriteCharPtr("/BBox[0 0 ");
275 aEditBuffer
.WriteOString(OString::number(rSignatureRectangle
.getWidth()));
276 aEditBuffer
.WriteCharPtr(" ");
277 aEditBuffer
.WriteOString(OString::number(rSignatureRectangle
.getHeight()));
278 aEditBuffer
.WriteCharPtr("]\n/Length ");
280 // Add the object to the doc-level edit buffer and update the offset.
281 SvMemoryStream aStream
;
282 bool bCompressed
= false;
283 sal_Int32 nLength
= 0;
284 if (!aContentStreams
.empty())
286 nLength
= PDFObjectCopier::copyPageStreams(aContentStreams
, aStream
, bCompressed
);
288 aEditBuffer
.WriteOString(OString::number(nLength
));
291 aEditBuffer
.WriteOString(" /Filter/FlateDecode");
294 aEditBuffer
.WriteCharPtr("\n>>\n");
296 aEditBuffer
.WriteCharPtr("stream\n");
298 // Copy the original page streams to the form XObject stream.
300 aEditBuffer
.WriteStream(aStream
);
302 aEditBuffer
.WriteCharPtr("\nendstream\nendobj\n\n");
305 XRefEntry aAppearanceEntry
;
306 aAppearanceEntry
.SetOffset(m_aEditBuffer
.Tell());
307 aAppearanceEntry
.SetDirty(true);
308 m_aXRef
[nAppearanceId
] = aAppearanceEntry
;
309 m_aEditBuffer
.WriteStream(aEditBuffer
);
311 return nAppearanceId
;
314 sal_Int32
PDFDocument::WriteAnnotObject(PDFObjectElement
const& rFirstPage
, sal_Int32 nSignatureId
,
315 sal_Int32 nAppearanceId
,
316 const tools::Rectangle
& rSignatureRectangle
)
318 // Decide what identifier to use for the new signature.
319 sal_uInt32 nNextSignature
= GetNextSignature();
321 // Write the Annot object, references nSignatureId and nAppearanceId.
322 sal_Int32 nAnnotId
= m_aXRef
.size();
323 XRefEntry aAnnotEntry
;
324 aAnnotEntry
.SetOffset(m_aEditBuffer
.Tell());
325 aAnnotEntry
.SetDirty(true);
326 m_aXRef
[nAnnotId
] = aAnnotEntry
;
327 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
328 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
329 m_aEditBuffer
.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
330 m_aEditBuffer
.WriteCharPtr("/Rect[0 0 ");
331 m_aEditBuffer
.WriteOString(OString::number(rSignatureRectangle
.getWidth()));
332 m_aEditBuffer
.WriteCharPtr(" ");
333 m_aEditBuffer
.WriteOString(OString::number(rSignatureRectangle
.getHeight()));
334 m_aEditBuffer
.WriteCharPtr("]\n");
335 m_aEditBuffer
.WriteCharPtr("/FT/Sig\n");
336 m_aEditBuffer
.WriteCharPtr("/P ");
337 m_aEditBuffer
.WriteUInt32AsString(rFirstPage
.GetObjectValue());
338 m_aEditBuffer
.WriteCharPtr(" 0 R\n");
339 m_aEditBuffer
.WriteCharPtr("/T(Signature");
340 m_aEditBuffer
.WriteUInt32AsString(nNextSignature
);
341 m_aEditBuffer
.WriteCharPtr(")\n");
342 m_aEditBuffer
.WriteCharPtr("/V ");
343 m_aEditBuffer
.WriteUInt32AsString(nSignatureId
);
344 m_aEditBuffer
.WriteCharPtr(" 0 R\n");
345 m_aEditBuffer
.WriteCharPtr("/DV ");
346 m_aEditBuffer
.WriteUInt32AsString(nSignatureId
);
347 m_aEditBuffer
.WriteCharPtr(" 0 R\n");
348 m_aEditBuffer
.WriteCharPtr("/AP<<\n/N ");
349 m_aEditBuffer
.WriteUInt32AsString(nAppearanceId
);
350 m_aEditBuffer
.WriteCharPtr(" 0 R\n>>\n");
351 m_aEditBuffer
.WriteCharPtr(">>\nendobj\n\n");
356 bool PDFDocument::WritePageObject(PDFObjectElement
& rFirstPage
, sal_Int32 nAnnotId
)
358 PDFElement
* pAnnots
= rFirstPage
.Lookup("Annots");
359 auto pAnnotsReference
= dynamic_cast<PDFReferenceElement
*>(pAnnots
);
360 if (pAnnotsReference
)
362 // Write the updated Annots key of the Page object.
363 PDFObjectElement
* pAnnotsObject
= pAnnotsReference
->LookupObject();
366 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
370 sal_uInt32 nAnnotsId
= pAnnotsObject
->GetObjectValue();
371 m_aXRef
[nAnnotsId
].SetType(XRefEntryType::NOT_COMPRESSED
);
372 m_aXRef
[nAnnotsId
].SetOffset(m_aEditBuffer
.Tell());
373 m_aXRef
[nAnnotsId
].SetDirty(true);
374 m_aEditBuffer
.WriteUInt32AsString(nAnnotsId
);
375 m_aEditBuffer
.WriteCharPtr(" 0 obj\n[");
377 // Write existing references.
378 PDFArrayElement
* pArray
= pAnnotsObject
->GetArray();
381 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
385 for (size_t i
= 0; i
< pArray
->GetElements().size(); ++i
)
387 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pArray
->GetElements()[i
]);
392 m_aEditBuffer
.WriteCharPtr(" ");
393 m_aEditBuffer
.WriteUInt32AsString(pReference
->GetObjectValue());
394 m_aEditBuffer
.WriteCharPtr(" 0 R");
396 // Write our reference.
397 m_aEditBuffer
.WriteCharPtr(" ");
398 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
399 m_aEditBuffer
.WriteCharPtr(" 0 R");
401 m_aEditBuffer
.WriteCharPtr("]\nendobj\n\n");
405 // Write the updated first page object, references nAnnotId.
406 sal_uInt32 nFirstPageId
= rFirstPage
.GetObjectValue();
407 if (nFirstPageId
>= m_aXRef
.size())
409 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
412 m_aXRef
[nFirstPageId
].SetOffset(m_aEditBuffer
.Tell());
413 m_aXRef
[nFirstPageId
].SetDirty(true);
414 m_aEditBuffer
.WriteUInt32AsString(nFirstPageId
);
415 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
416 m_aEditBuffer
.WriteCharPtr("<<");
417 auto pAnnotsArray
= dynamic_cast<PDFArrayElement
*>(pAnnots
);
420 // No Annots key, just write the key with a single reference.
421 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
422 + rFirstPage
.GetDictionaryOffset(),
423 rFirstPage
.GetDictionaryLength());
424 m_aEditBuffer
.WriteCharPtr("/Annots[");
425 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
426 m_aEditBuffer
.WriteCharPtr(" 0 R]");
430 // Annots key is already there, insert our reference at the end.
431 PDFDictionaryElement
* pDictionary
= rFirstPage
.GetDictionary();
433 // Offset right before the end of the Annots array.
434 sal_uInt64 nAnnotsEndOffset
= pDictionary
->GetKeyOffset("Annots")
435 + pDictionary
->GetKeyValueLength("Annots") - 1;
436 // Length of beginning of the dictionary -> Annots end.
437 sal_uInt64 nAnnotsBeforeEndLength
= nAnnotsEndOffset
- rFirstPage
.GetDictionaryOffset();
438 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
439 + rFirstPage
.GetDictionaryOffset(),
440 nAnnotsBeforeEndLength
);
441 m_aEditBuffer
.WriteCharPtr(" ");
442 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
443 m_aEditBuffer
.WriteCharPtr(" 0 R");
444 // Length of Annots end -> end of the dictionary.
445 sal_uInt64 nAnnotsAfterEndLength
= rFirstPage
.GetDictionaryOffset()
446 + rFirstPage
.GetDictionaryLength()
448 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
450 nAnnotsAfterEndLength
);
452 m_aEditBuffer
.WriteCharPtr(">>");
453 m_aEditBuffer
.WriteCharPtr("\nendobj\n\n");
459 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId
, PDFReferenceElement
*& pRoot
)
462 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Root"));
467 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
470 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pTrailer
->Lookup("Root"));
474 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
477 PDFObjectElement
* pCatalog
= pRoot
->LookupObject();
480 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
483 sal_uInt32 nCatalogId
= pCatalog
->GetObjectValue();
484 if (nCatalogId
>= m_aXRef
.size())
486 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
489 PDFElement
* pAcroForm
= pCatalog
->Lookup("AcroForm");
490 auto pAcroFormReference
= dynamic_cast<PDFReferenceElement
*>(pAcroForm
);
491 if (pAcroFormReference
)
493 // Write the updated AcroForm key of the Catalog object.
494 PDFObjectElement
* pAcroFormObject
= pAcroFormReference
->LookupObject();
495 if (!pAcroFormObject
)
497 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
501 sal_uInt32 nAcroFormId
= pAcroFormObject
->GetObjectValue();
502 m_aXRef
[nAcroFormId
].SetType(XRefEntryType::NOT_COMPRESSED
);
503 m_aXRef
[nAcroFormId
].SetOffset(m_aEditBuffer
.Tell());
504 m_aXRef
[nAcroFormId
].SetDirty(true);
505 m_aEditBuffer
.WriteUInt32AsString(nAcroFormId
);
506 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
508 // If this is nullptr, then the AcroForm object is not in an object stream.
509 SvMemoryStream
* pStreamBuffer
= pAcroFormObject
->GetStreamBuffer();
511 if (!pAcroFormObject
->Lookup("Fields"))
513 SAL_WARN("vcl.filter",
514 "PDFDocument::Sign: AcroForm object without required Fields key");
518 PDFDictionaryElement
* pAcroFormDictionary
= pAcroFormObject
->GetDictionary();
519 if (!pAcroFormDictionary
)
521 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
525 // Offset right before the end of the Fields array.
526 sal_uInt64 nFieldsEndOffset
= pAcroFormDictionary
->GetKeyOffset("Fields")
527 + pAcroFormDictionary
->GetKeyValueLength("Fields")
530 // Length of beginning of the object dictionary -> Fields end.
531 sal_uInt64 nFieldsBeforeEndLength
= nFieldsEndOffset
;
533 m_aEditBuffer
.WriteBytes(pStreamBuffer
->GetData(), nFieldsBeforeEndLength
);
536 nFieldsBeforeEndLength
-= pAcroFormObject
->GetDictionaryOffset();
537 m_aEditBuffer
.WriteCharPtr("<<");
538 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
539 + pAcroFormObject
->GetDictionaryOffset(),
540 nFieldsBeforeEndLength
);
543 // Append our reference at the end of the Fields array.
544 m_aEditBuffer
.WriteCharPtr(" ");
545 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
546 m_aEditBuffer
.WriteCharPtr(" 0 R");
548 // Length of Fields end -> end of the object dictionary.
551 sal_uInt64 nFieldsAfterEndLength
= pStreamBuffer
->GetSize() - nFieldsEndOffset
;
552 m_aEditBuffer
.WriteBytes(static_cast<const char*>(pStreamBuffer
->GetData())
554 nFieldsAfterEndLength
);
558 sal_uInt64 nFieldsAfterEndLength
= pAcroFormObject
->GetDictionaryOffset()
559 + pAcroFormObject
->GetDictionaryLength()
561 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
563 nFieldsAfterEndLength
);
564 m_aEditBuffer
.WriteCharPtr(">>");
567 m_aEditBuffer
.WriteCharPtr("\nendobj\n\n");
571 // Write the updated Catalog object, references nAnnotId.
572 auto pAcroFormDictionary
= dynamic_cast<PDFDictionaryElement
*>(pAcroForm
);
573 m_aXRef
[nCatalogId
].SetOffset(m_aEditBuffer
.Tell());
574 m_aXRef
[nCatalogId
].SetDirty(true);
575 m_aEditBuffer
.WriteUInt32AsString(nCatalogId
);
576 m_aEditBuffer
.WriteCharPtr(" 0 obj\n");
577 m_aEditBuffer
.WriteCharPtr("<<");
578 if (!pAcroFormDictionary
)
580 // No AcroForm key, assume no signatures.
581 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
582 + pCatalog
->GetDictionaryOffset(),
583 pCatalog
->GetDictionaryLength());
584 m_aEditBuffer
.WriteCharPtr("/AcroForm<</Fields[\n");
585 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
586 m_aEditBuffer
.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
590 // AcroForm key is already there, insert our reference at the Fields end.
591 auto it
= pAcroFormDictionary
->GetItems().find("Fields");
592 if (it
== pAcroFormDictionary
->GetItems().end())
594 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
598 auto pFields
= dynamic_cast<PDFArrayElement
*>(it
->second
);
601 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
605 // Offset right before the end of the Fields array.
606 sal_uInt64 nFieldsEndOffset
= pAcroFormDictionary
->GetKeyOffset("Fields")
607 + pAcroFormDictionary
->GetKeyValueLength("Fields") - 1;
608 // Length of beginning of the Catalog dictionary -> Fields end.
609 sal_uInt64 nFieldsBeforeEndLength
= nFieldsEndOffset
- pCatalog
->GetDictionaryOffset();
610 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
611 + pCatalog
->GetDictionaryOffset(),
612 nFieldsBeforeEndLength
);
613 m_aEditBuffer
.WriteCharPtr(" ");
614 m_aEditBuffer
.WriteUInt32AsString(nAnnotId
);
615 m_aEditBuffer
.WriteCharPtr(" 0 R");
616 // Length of Fields end -> end of the Catalog dictionary.
617 sal_uInt64 nFieldsAfterEndLength
= pCatalog
->GetDictionaryOffset()
618 + pCatalog
->GetDictionaryLength() - nFieldsEndOffset
;
619 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
621 nFieldsAfterEndLength
);
623 m_aEditBuffer
.WriteCharPtr(">>\nendobj\n\n");
629 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset
, PDFReferenceElement
const* pRoot
)
633 // Write the xref stream.
634 // This is a bit meta: the xref stream stores its own offset.
635 sal_Int32 nXRefStreamId
= m_aXRef
.size();
636 XRefEntry aXRefStreamEntry
;
637 aXRefStreamEntry
.SetOffset(nXRefOffset
);
638 aXRefStreamEntry
.SetDirty(true);
639 m_aXRef
[nXRefStreamId
] = aXRefStreamEntry
;
641 // Write stream data.
642 SvMemoryStream aXRefStream
;
643 const size_t nOffsetLen
= 3;
644 // 3 additional bytes: predictor, the first and the third field.
645 const size_t nLineLength
= nOffsetLen
+ 3;
646 // This is the line as it appears before tweaking according to the predictor.
647 std::vector
<unsigned char> aOrigLine(nLineLength
);
648 // This is the previous line.
649 std::vector
<unsigned char> aPrevLine(nLineLength
);
650 // This is the line as written to the stream.
651 std::vector
<unsigned char> aFilteredLine(nLineLength
);
652 for (const auto& rXRef
: m_aXRef
)
654 const XRefEntry
& rEntry
= rXRef
.second
;
656 if (!rEntry
.GetDirty())
661 // PNG prediction: up (on all rows).
662 aOrigLine
[nPos
++] = 2;
665 unsigned char nType
= 0;
666 switch (rEntry
.GetType())
668 case XRefEntryType::FREE
:
671 case XRefEntryType::NOT_COMPRESSED
:
674 case XRefEntryType::COMPRESSED
:
678 aOrigLine
[nPos
++] = nType
;
681 for (size_t i
= 0; i
< nOffsetLen
; ++i
)
683 size_t nByte
= nOffsetLen
- i
- 1;
684 // Fields requiring more than one byte are stored with the
685 // high-order byte first.
686 unsigned char nCh
= (rEntry
.GetOffset() & (0xff << (nByte
* 8))) >> (nByte
* 8);
687 aOrigLine
[nPos
++] = nCh
;
691 aOrigLine
[nPos
++] = 0;
693 // Now apply the predictor.
694 aFilteredLine
[0] = aOrigLine
[0];
695 for (size_t i
= 1; i
< nLineLength
; ++i
)
697 // Count the delta vs the previous line.
698 aFilteredLine
[i
] = aOrigLine
[i
] - aPrevLine
[i
];
699 // Remember the new reference.
700 aPrevLine
[i
] = aOrigLine
[i
];
703 aXRefStream
.WriteBytes(aFilteredLine
.data(), aFilteredLine
.size());
706 m_aEditBuffer
.WriteUInt32AsString(nXRefStreamId
);
707 m_aEditBuffer
.WriteCharPtr(
708 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
711 auto pID
= dynamic_cast<PDFArrayElement
*>(m_pXRefStream
->Lookup("ID"));
714 const std::vector
<PDFElement
*>& rElements
= pID
->GetElements();
715 m_aEditBuffer
.WriteCharPtr("/ID [ <");
716 for (size_t i
= 0; i
< rElements
.size(); ++i
)
718 auto pIDString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
]);
722 m_aEditBuffer
.WriteOString(pIDString
->GetValue());
723 if ((i
+ 1) < rElements
.size())
724 m_aEditBuffer
.WriteCharPtr("> <");
726 m_aEditBuffer
.WriteCharPtr("> ] ");
730 m_aEditBuffer
.WriteCharPtr("/Index [ ");
731 for (const auto& rXRef
: m_aXRef
)
733 if (!rXRef
.second
.GetDirty())
736 m_aEditBuffer
.WriteUInt32AsString(rXRef
.first
);
737 m_aEditBuffer
.WriteCharPtr(" 1 ");
739 m_aEditBuffer
.WriteCharPtr("] ");
742 auto pInfo
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Info"));
745 m_aEditBuffer
.WriteCharPtr("/Info ");
746 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetObjectValue());
747 m_aEditBuffer
.WriteCharPtr(" ");
748 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetGenerationValue());
749 m_aEditBuffer
.WriteCharPtr(" R ");
753 m_aEditBuffer
.WriteCharPtr("/Length ");
756 aZCodec
.BeginCompression();
758 SvMemoryStream aStream
;
759 aZCodec
.Compress(aXRefStream
, aStream
);
760 aZCodec
.EndCompression();
762 aXRefStream
.SetStreamSize(0);
764 aXRefStream
.WriteStream(aStream
);
766 m_aEditBuffer
.WriteUInt32AsString(aXRefStream
.GetSize());
768 if (!m_aStartXRefs
.empty())
770 // Write location of the previous cross-reference section.
771 m_aEditBuffer
.WriteCharPtr("/Prev ");
772 m_aEditBuffer
.WriteUInt32AsString(m_aStartXRefs
.back());
776 m_aEditBuffer
.WriteCharPtr("/Root ");
777 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetObjectValue());
778 m_aEditBuffer
.WriteCharPtr(" ");
779 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetGenerationValue());
780 m_aEditBuffer
.WriteCharPtr(" R ");
783 m_aEditBuffer
.WriteCharPtr("/Size ");
784 m_aEditBuffer
.WriteUInt32AsString(m_aXRef
.size());
786 m_aEditBuffer
.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
788 m_aEditBuffer
.WriteStream(aXRefStream
);
789 m_aEditBuffer
.WriteCharPtr("\nendstream\nendobj\n\n");
793 // Write the xref table.
794 m_aEditBuffer
.WriteCharPtr("xref\n");
795 for (const auto& rXRef
: m_aXRef
)
797 size_t nObject
= rXRef
.first
;
798 size_t nOffset
= rXRef
.second
.GetOffset();
799 if (!rXRef
.second
.GetDirty())
802 m_aEditBuffer
.WriteUInt32AsString(nObject
);
803 m_aEditBuffer
.WriteCharPtr(" 1\n");
804 OStringBuffer aBuffer
;
805 aBuffer
.append(static_cast<sal_Int32
>(nOffset
));
806 while (aBuffer
.getLength() < 10)
807 aBuffer
.insert(0, "0");
809 aBuffer
.append(" 65535 f \n");
811 aBuffer
.append(" 00000 n \n");
812 m_aEditBuffer
.WriteOString(aBuffer
.toString());
815 // Write the trailer.
816 m_aEditBuffer
.WriteCharPtr("trailer\n<</Size ");
817 m_aEditBuffer
.WriteUInt32AsString(m_aXRef
.size());
818 m_aEditBuffer
.WriteCharPtr("/Root ");
819 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetObjectValue());
820 m_aEditBuffer
.WriteCharPtr(" ");
821 m_aEditBuffer
.WriteUInt32AsString(pRoot
->GetGenerationValue());
822 m_aEditBuffer
.WriteCharPtr(" R\n");
823 auto pInfo
= dynamic_cast<PDFReferenceElement
*>(m_pTrailer
->Lookup("Info"));
826 m_aEditBuffer
.WriteCharPtr("/Info ");
827 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetObjectValue());
828 m_aEditBuffer
.WriteCharPtr(" ");
829 m_aEditBuffer
.WriteUInt32AsString(pInfo
->GetGenerationValue());
830 m_aEditBuffer
.WriteCharPtr(" R\n");
832 auto pID
= dynamic_cast<PDFArrayElement
*>(m_pTrailer
->Lookup("ID"));
835 const std::vector
<PDFElement
*>& rElements
= pID
->GetElements();
836 m_aEditBuffer
.WriteCharPtr("/ID [ <");
837 for (size_t i
= 0; i
< rElements
.size(); ++i
)
839 auto pIDString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
]);
843 m_aEditBuffer
.WriteOString(pIDString
->GetValue());
844 if ((i
+ 1) < rElements
.size())
845 m_aEditBuffer
.WriteCharPtr(">\n<");
847 m_aEditBuffer
.WriteCharPtr("> ]\n");
850 if (!m_aStartXRefs
.empty())
852 // Write location of the previous cross-reference section.
853 m_aEditBuffer
.WriteCharPtr("/Prev ");
854 m_aEditBuffer
.WriteUInt32AsString(m_aStartXRefs
.back());
857 m_aEditBuffer
.WriteCharPtr(">>\n");
861 bool PDFDocument::Sign(const uno::Reference
<security::XCertificate
>& xCertificate
,
862 const OUString
& rDescription
, bool bAdES
)
864 m_aEditBuffer
.Seek(STREAM_SEEK_TO_END
);
865 m_aEditBuffer
.WriteCharPtr("\n");
867 sal_uInt64 nSignatureLastByteRangeOffset
= 0;
868 sal_Int64 nSignatureContentOffset
= 0;
869 sal_Int32 nSignatureId
= WriteSignatureObject(
870 rDescription
, bAdES
, nSignatureLastByteRangeOffset
, nSignatureContentOffset
);
872 tools::Rectangle aSignatureRectangle
;
873 sal_Int32 nAppearanceId
= WriteAppearanceObject(aSignatureRectangle
);
875 std::vector
<PDFObjectElement
*> aPages
= GetPages();
878 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
883 if (m_nSignaturePage
< aPages
.size())
885 nPage
= m_nSignaturePage
;
889 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage
);
893 PDFObjectElement
& rPage
= *aPages
[nPage
];
894 sal_Int32 nAnnotId
= WriteAnnotObject(rPage
, nSignatureId
, nAppearanceId
, aSignatureRectangle
);
896 if (!WritePageObject(rPage
, nAnnotId
))
898 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
902 PDFReferenceElement
* pRoot
= nullptr;
903 if (!WriteCatalogObject(nAnnotId
, pRoot
))
905 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
909 sal_uInt64 nXRefOffset
= m_aEditBuffer
.Tell();
910 WriteXRef(nXRefOffset
, pRoot
);
913 m_aEditBuffer
.WriteCharPtr("startxref\n");
914 m_aEditBuffer
.WriteUInt32AsString(nXRefOffset
);
915 m_aEditBuffer
.WriteCharPtr("\n%%EOF\n");
917 // Finalize the signature, now that we know the total file size.
918 // Calculate the length of the last byte range.
919 sal_uInt64 nFileEnd
= m_aEditBuffer
.Tell();
920 sal_Int64 nLastByteRangeLength
921 = nFileEnd
- (nSignatureContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
922 // Write the length to the buffer.
923 m_aEditBuffer
.Seek(nSignatureLastByteRangeOffset
);
924 OString aByteRangeBuffer
= OString::number(nLastByteRangeLength
) + " ]";
925 m_aEditBuffer
.WriteOString(aByteRangeBuffer
);
927 // Create the PKCS#7 object.
928 css::uno::Sequence
<sal_Int8
> aDerEncoded
= xCertificate
->getEncoded();
929 if (!aDerEncoded
.hasElements())
931 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
935 m_aEditBuffer
.Seek(0);
936 sal_uInt64 nBufferSize1
= nSignatureContentOffset
- 1;
937 std::unique_ptr
<char[]> aBuffer1(new char[nBufferSize1
]);
938 m_aEditBuffer
.ReadBytes(aBuffer1
.get(), nBufferSize1
);
940 m_aEditBuffer
.Seek(nSignatureContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
941 sal_uInt64 nBufferSize2
= nLastByteRangeLength
;
942 std::unique_ptr
<char[]> aBuffer2(new char[nBufferSize2
]);
943 m_aEditBuffer
.ReadBytes(aBuffer2
.get(), nBufferSize2
);
945 OStringBuffer aCMSHexBuffer
;
946 svl::crypto::Signing
aSigning(xCertificate
);
947 aSigning
.AddDataRange(aBuffer1
.get(), nBufferSize1
);
948 aSigning
.AddDataRange(aBuffer2
.get(), nBufferSize2
);
949 if (!aSigning
.Sign(aCMSHexBuffer
))
951 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
955 assert(aCMSHexBuffer
.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH
);
957 m_aEditBuffer
.Seek(nSignatureContentOffset
);
958 m_aEditBuffer
.WriteOString(aCMSHexBuffer
.toString());
963 bool PDFDocument::Write(SvStream
& rStream
)
965 m_aEditBuffer
.Seek(0);
966 rStream
.WriteStream(m_aEditBuffer
);
967 return rStream
.good();
970 bool PDFDocument::Tokenize(SvStream
& rStream
, TokenizeMode eMode
,
971 std::vector
<std::unique_ptr
<PDFElement
>>& rElements
,
972 PDFObjectElement
* pObjectElement
)
974 // Last seen object token.
975 PDFObjectElement
* pObject
= pObjectElement
;
976 PDFNameElement
* pObjectKey
= nullptr;
977 PDFObjectElement
* pObjectStream
= nullptr;
978 bool bInXRef
= false;
979 // The next number will be an xref offset.
980 bool bInStartXRef
= false;
981 // Dictionary depth, so we know when we're outside any dictionaries.
983 // Last seen array token that's outside any dictionaries.
984 PDFArrayElement
* pArray
= nullptr;
985 // If we're inside an obj/endobj pair.
986 bool bInObject
= false;
991 rStream
.ReadChar(ch
);
999 auto pComment
= new PDFCommentElement(*this);
1000 rElements
.push_back(std::unique_ptr
<PDFElement
>(pComment
));
1001 rStream
.SeekRel(-1);
1002 if (!rElements
.back()->Read(rStream
))
1004 SAL_WARN("vcl.filter",
1005 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1008 if (eMode
== TokenizeMode::EOF_TOKEN
&& !m_aEOFs
.empty()
1009 && m_aEOFs
.back() == rStream
.Tell())
1011 // Found EOF and partial parsing requested, we're done.
1018 // Dictionary or hex string.
1019 rStream
.ReadChar(ch
);
1020 rStream
.SeekRel(-2);
1023 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFDictionaryElement()));
1027 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFHexStringElement
));
1028 if (!rElements
.back()->Read(rStream
))
1030 SAL_WARN("vcl.filter",
1031 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1038 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndDictionaryElement()));
1040 rStream
.SeekRel(-1);
1041 if (!rElements
.back()->Read(rStream
))
1043 SAL_WARN("vcl.filter",
1044 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1051 auto pArr
= new PDFArrayElement(pObject
);
1052 rElements
.push_back(std::unique_ptr
<PDFElement
>(pArr
));
1055 // The array is attached directly, inform the object.
1059 pObject
->SetArray(pArray
);
1060 pObject
->SetArrayOffset(rStream
.Tell());
1064 rStream
.SeekRel(-1);
1065 if (!rElements
.back()->Read(rStream
))
1067 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1074 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndArrayElement()));
1076 rStream
.SeekRel(-1);
1081 pObject
->SetArrayLength(rStream
.Tell() - pObject
->GetArrayOffset());
1084 if (!rElements
.back()->Read(rStream
))
1086 SAL_WARN("vcl.filter",
1087 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1094 auto pNameElement
= new PDFNameElement();
1095 rElements
.push_back(std::unique_ptr
<PDFElement
>(pNameElement
));
1096 rStream
.SeekRel(-1);
1097 if (!pNameElement
->Read(rStream
))
1099 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1103 if (pObject
&& pObjectKey
&& pObjectKey
->GetValue() == "Type"
1104 && pNameElement
->GetValue() == "ObjStm")
1105 pObjectStream
= pObject
;
1107 pObjectKey
= pNameElement
;
1112 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFLiteralStringElement
));
1113 rStream
.SeekRel(-1);
1114 if (!rElements
.back()->Read(rStream
))
1116 SAL_WARN("vcl.filter",
1117 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1124 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)) || ch
== '-' || ch
== '+'
1127 // Numbering object: an integer or a real.
1128 auto pNumberElement
= new PDFNumberElement();
1129 rElements
.push_back(std::unique_ptr
<PDFElement
>(pNumberElement
));
1130 rStream
.SeekRel(-1);
1131 if (!pNumberElement
->Read(rStream
))
1133 SAL_WARN("vcl.filter",
1134 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1139 bInStartXRef
= false;
1140 m_aStartXRefs
.push_back(pNumberElement
->GetValue());
1142 auto it
= m_aOffsetObjects
.find(pNumberElement
->GetValue());
1143 if (it
!= m_aOffsetObjects
.end())
1144 m_pXRefStream
= it
->second
;
1146 else if (bInObject
&& !nDepth
&& pObject
)
1147 // Number element inside an object, but outside a
1148 // dictionary / array: remember it.
1149 pObject
->SetNumberElement(pNumberElement
);
1151 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
1153 // Possible keyword, like "obj".
1154 rStream
.SeekRel(-1);
1155 OString aKeyword
= ReadKeyword(rStream
);
1157 bool bObj
= aKeyword
== "obj";
1158 if (bObj
|| aKeyword
== "R")
1160 size_t nElements
= rElements
.size();
1163 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1164 "tokens before 'obj' or 'R' keyword");
1169 = dynamic_cast<PDFNumberElement
*>(rElements
[nElements
- 2].get());
1170 auto pGenerationNumber
1171 = dynamic_cast<PDFNumberElement
*>(rElements
[nElements
- 1].get());
1172 if (!pObjectNumber
|| !pGenerationNumber
)
1174 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1175 "generation number before 'obj' or 'R' keyword");
1181 pObject
= new PDFObjectElement(*this, pObjectNumber
->GetValue(),
1182 pGenerationNumber
->GetValue());
1183 rElements
.push_back(std::unique_ptr
<PDFElement
>(pObject
));
1184 m_aOffsetObjects
[pObjectNumber
->GetLocation()] = pObject
;
1185 m_aIDObjects
[pObjectNumber
->GetValue()] = pObject
;
1190 auto pReference
= new PDFReferenceElement(*this, *pObjectNumber
,
1191 *pGenerationNumber
);
1192 rElements
.push_back(std::unique_ptr
<PDFElement
>(pReference
));
1193 if (bInObject
&& nDepth
> 0 && pObject
)
1194 // Inform the object about a new in-dictionary reference.
1195 pObject
->AddDictionaryReference(pReference
);
1197 if (!rElements
.back()->Read(rStream
))
1199 SAL_WARN("vcl.filter",
1200 "PDFDocument::Tokenize: PDFElement::Read() failed");
1204 else if (aKeyword
== "stream")
1206 // Look up the length of the stream from the parent object's dictionary.
1208 for (size_t nElement
= 0; nElement
< rElements
.size(); ++nElement
)
1210 // Iterate in reverse order.
1211 size_t nIndex
= rElements
.size() - nElement
- 1;
1212 PDFElement
* pElement
= rElements
[nIndex
].get();
1213 auto pObj
= dynamic_cast<PDFObjectElement
*>(pElement
);
1217 PDFElement
* pLookup
= pObj
->Lookup("Length");
1218 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pLookup
);
1221 // Length is provided as a reference.
1222 nLength
= pReference
->LookupNumber(rStream
);
1226 auto pNumber
= dynamic_cast<PDFNumberElement
*>(pLookup
);
1229 // Length is provided directly.
1230 nLength
= pNumber
->GetValue();
1236 "PDFDocument::Tokenize: found no Length key for stream keyword");
1240 PDFDocument::SkipLineBreaks(rStream
);
1241 auto pStreamElement
= new PDFStreamElement(nLength
);
1243 pObject
->SetStream(pStreamElement
);
1244 rElements
.push_back(std::unique_ptr
<PDFElement
>(pStreamElement
));
1245 if (!rElements
.back()->Read(rStream
))
1247 SAL_WARN("vcl.filter",
1248 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1252 else if (aKeyword
== "endstream")
1254 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndStreamElement
));
1255 if (!rElements
.back()->Read(rStream
))
1257 SAL_WARN("vcl.filter",
1258 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1262 else if (aKeyword
== "endobj")
1264 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndObjectElement
));
1265 if (!rElements
.back()->Read(rStream
))
1267 SAL_WARN("vcl.filter",
1268 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1271 if (eMode
== TokenizeMode::END_OF_OBJECT
)
1273 // Found endobj and only object parsing was requested, we're done.
1279 // We're at the end of an object stream, parse the stored objects.
1280 pObjectStream
->ParseStoredObjects();
1281 pObjectStream
= nullptr;
1282 pObjectKey
= nullptr;
1286 else if (aKeyword
== "true" || aKeyword
== "false")
1287 rElements
.push_back(std::unique_ptr
<PDFElement
>(
1288 new PDFBooleanElement(aKeyword
.toBoolean())));
1289 else if (aKeyword
== "null")
1290 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFNullElement
));
1291 else if (aKeyword
== "xref")
1292 // Allow 'f' and 'n' keywords.
1294 else if (bInXRef
&& (aKeyword
== "f" || aKeyword
== "n"))
1297 else if (aKeyword
== "trailer")
1299 auto pTrailer
= new PDFTrailerElement(*this);
1301 // Make it possible to find this trailer later by offset.
1302 pTrailer
->Read(rStream
);
1303 m_aOffsetTrailers
[pTrailer
->GetLocation()] = pTrailer
;
1305 // When reading till the first EOF token only, remember
1306 // just the first trailer token.
1307 if (eMode
!= TokenizeMode::EOF_TOKEN
|| !m_pTrailer
)
1308 m_pTrailer
= pTrailer
;
1309 rElements
.push_back(std::unique_ptr
<PDFElement
>(pTrailer
));
1311 else if (aKeyword
== "startxref")
1313 bInStartXRef
= true;
1317 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1318 << aKeyword
<< "' keyword at byte position "
1325 auto uChar
= static_cast<unsigned char>(ch
);
1326 // Be more lenient and allow unexpected null char
1327 if (!rtl::isAsciiWhiteSpace(uChar
) && uChar
!= 0)
1329 SAL_WARN("vcl.filter",
1330 "PDFDocument::Tokenize: unexpected character with code "
1331 << sal_Int32(ch
) << " at byte position " << rStream
.Tell());
1334 SAL_WARN_IF(uChar
== 0, "vcl.filter",
1335 "PDFDocument::Tokenize: unexpected null character at "
1336 << rStream
.Tell() << " - ignoring");
1346 void PDFDocument::SetIDObject(size_t nID
, PDFObjectElement
* pObject
)
1348 m_aIDObjects
[nID
] = pObject
;
1351 bool PDFDocument::Read(SvStream
& rStream
)
1353 // Check file magic.
1354 std::vector
<sal_Int8
> aHeader(5);
1356 rStream
.ReadBytes(aHeader
.data(), aHeader
.size());
1357 if (aHeader
[0] != '%' || aHeader
[1] != 'P' || aHeader
[2] != 'D' || aHeader
[3] != 'F'
1358 || aHeader
[4] != '-')
1360 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1364 // Allow later editing of the contents in-memory.
1366 m_aEditBuffer
.WriteStream(rStream
);
1368 // Look up the offset of the xref table.
1369 size_t nStartXRef
= FindStartXRef(rStream
);
1370 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef
);
1371 if (nStartXRef
== 0)
1373 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1378 rStream
.Seek(nStartXRef
);
1379 OString aKeyword
= ReadKeyword(rStream
);
1380 if (aKeyword
.isEmpty())
1381 ReadXRefStream(rStream
);
1385 if (aKeyword
!= "xref")
1387 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1391 if (!Tokenize(rStream
, TokenizeMode::EOF_TOKEN
, m_aElements
, nullptr))
1393 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1398 PDFNumberElement
* pPrev
= nullptr;
1401 pPrev
= dynamic_cast<PDFNumberElement
*>(m_pTrailer
->Lookup("Prev"));
1403 // Remember the offset of this trailer in the correct order. It's
1404 // possible that newer trailers don't have a larger offset.
1405 m_aTrailerOffsets
.push_back(m_pTrailer
->GetLocation());
1407 else if (m_pXRefStream
)
1408 pPrev
= dynamic_cast<PDFNumberElement
*>(m_pXRefStream
->Lookup("Prev"));
1410 nStartXRef
= pPrev
->GetValue();
1412 // Reset state, except the edit buffer.
1413 m_aElements
.clear();
1414 m_aOffsetObjects
.clear();
1415 m_aIDObjects
.clear();
1416 m_aStartXRefs
.clear();
1418 m_pTrailer
= nullptr;
1419 m_pXRefStream
= nullptr;
1424 // Then we can tokenize the stream.
1426 return Tokenize(rStream
, TokenizeMode::END_OF_STREAM
, m_aElements
, nullptr);
1429 OString
PDFDocument::ReadKeyword(SvStream
& rStream
)
1433 rStream
.ReadChar(ch
);
1436 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
1439 rStream
.ReadChar(ch
);
1441 return aBuf
.toString();
1443 rStream
.SeekRel(-1);
1444 return aBuf
.toString();
1447 size_t PDFDocument::FindStartXRef(SvStream
& rStream
)
1449 // Find the "startxref" token, somewhere near the end of the document.
1450 std::vector
<char> aBuf(1024);
1451 rStream
.Seek(STREAM_SEEK_TO_END
);
1452 if (rStream
.Tell() > aBuf
.size())
1453 rStream
.SeekRel(static_cast<sal_Int64
>(-1) * aBuf
.size());
1455 // The document is really short, then just read it from the start.
1457 size_t nBeforePeek
= rStream
.Tell();
1458 size_t nSize
= rStream
.ReadBytes(aBuf
.data(), aBuf
.size());
1459 rStream
.Seek(nBeforePeek
);
1460 if (nSize
!= aBuf
.size())
1462 OString
aPrefix("startxref");
1463 // Find the last startxref at the end of the document.
1464 auto itLastValid
= aBuf
.end();
1465 auto it
= aBuf
.begin();
1468 it
= std::search(it
, aBuf
.end(), aPrefix
.getStr(), aPrefix
.getStr() + aPrefix
.getLength());
1469 if (it
== aBuf
.end())
1475 if (itLastValid
== aBuf
.end())
1477 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1481 rStream
.SeekRel(itLastValid
- aBuf
.begin() + aPrefix
.getLength());
1484 SAL_WARN("vcl.filter",
1485 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1489 PDFDocument::SkipWhitespace(rStream
);
1490 PDFNumberElement aNumber
;
1491 if (!aNumber
.Read(rStream
))
1493 return aNumber
.GetValue();
1496 void PDFDocument::ReadXRefStream(SvStream
& rStream
)
1498 // Look up the stream length in the object dictionary.
1499 if (!Tokenize(rStream
, TokenizeMode::END_OF_OBJECT
, m_aElements
, nullptr))
1501 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1505 if (m_aElements
.empty())
1507 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1511 PDFObjectElement
* pObject
= nullptr;
1512 for (const auto& pElement
: m_aElements
)
1514 if (auto pObj
= dynamic_cast<PDFObjectElement
*>(pElement
.get()))
1522 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1526 // So that the Prev key can be looked up later.
1527 m_pXRefStream
= pObject
;
1529 PDFElement
* pLookup
= pObject
->Lookup("Length");
1530 auto pNumber
= dynamic_cast<PDFNumberElement
*>(pLookup
);
1533 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1536 sal_uInt64 nLength
= pNumber
->GetValue();
1538 // Look up the stream offset.
1539 PDFStreamElement
* pStream
= nullptr;
1540 for (const auto& pElement
: m_aElements
)
1542 if (auto pS
= dynamic_cast<PDFStreamElement
*>(pElement
.get()))
1550 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1554 // Read and decompress it.
1555 rStream
.Seek(pStream
->GetOffset());
1556 std::vector
<char> aBuf(nLength
);
1557 rStream
.ReadBytes(aBuf
.data(), aBuf
.size());
1559 auto pFilter
= dynamic_cast<PDFNameElement
*>(pObject
->Lookup("Filter"));
1562 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1566 if (pFilter
->GetValue() != "FlateDecode")
1568 SAL_WARN("vcl.filter",
1569 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter
->GetValue());
1575 if (auto pDecodeParams
= dynamic_cast<PDFDictionaryElement
*>(pObject
->Lookup("DecodeParms")))
1577 const std::map
<OString
, PDFElement
*>& rItems
= pDecodeParams
->GetItems();
1578 auto it
= rItems
.find("Columns");
1579 if (it
!= rItems
.end())
1580 if (auto pColumns
= dynamic_cast<PDFNumberElement
*>(it
->second
))
1581 nColumns
= pColumns
->GetValue();
1582 it
= rItems
.find("Predictor");
1583 if (it
!= rItems
.end())
1584 if (auto pPredictor
= dynamic_cast<PDFNumberElement
*>(it
->second
))
1585 nPredictor
= pPredictor
->GetValue();
1588 SvMemoryStream
aSource(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
1589 SvMemoryStream aStream
;
1591 aZCodec
.BeginCompression();
1592 aZCodec
.Decompress(aSource
, aStream
);
1593 if (!aZCodec
.EndCompression())
1595 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1599 // Look up the first and the last entry we need to read.
1600 auto pIndex
= dynamic_cast<PDFArrayElement
*>(pObject
->Lookup("Index"));
1601 std::vector
<size_t> aFirstObjects
;
1602 std::vector
<size_t> aNumberOfObjects
;
1605 auto pSize
= dynamic_cast<PDFNumberElement
*>(pObject
->Lookup("Size"));
1608 aFirstObjects
.push_back(0);
1609 aNumberOfObjects
.push_back(pSize
->GetValue());
1613 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1619 const std::vector
<PDFElement
*>& rIndexElements
= pIndex
->GetElements();
1620 size_t nFirstObject
= 0;
1621 for (size_t i
= 0; i
< rIndexElements
.size(); ++i
)
1625 auto pFirstObject
= dynamic_cast<PDFNumberElement
*>(rIndexElements
[i
]);
1628 SAL_WARN("vcl.filter",
1629 "PDFDocument::ReadXRefStream: Index has no first object");
1632 nFirstObject
= pFirstObject
->GetValue();
1636 auto pNumberOfObjects
= dynamic_cast<PDFNumberElement
*>(rIndexElements
[i
]);
1637 if (!pNumberOfObjects
)
1639 SAL_WARN("vcl.filter",
1640 "PDFDocument::ReadXRefStream: Index has no number of objects");
1643 aFirstObjects
.push_back(nFirstObject
);
1644 aNumberOfObjects
.push_back(pNumberOfObjects
->GetValue());
1648 // Look up the format of a single entry.
1649 const int nWSize
= 3;
1650 auto pW
= dynamic_cast<PDFArrayElement
*>(pObject
->Lookup("W"));
1651 if (!pW
|| pW
->GetElements().size() < nWSize
)
1653 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1657 // First character is the (kind of) repeated predictor.
1658 int nLineLength
= 1;
1659 for (size_t i
= 0; i
< nWSize
; ++i
)
1661 auto pI
= dynamic_cast<PDFNumberElement
*>(pW
->GetElements()[i
]);
1664 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1667 aW
[i
] = pI
->GetValue();
1668 nLineLength
+= aW
[i
];
1671 if (nPredictor
> 1 && nLineLength
- 1 != nColumns
)
1673 SAL_WARN("vcl.filter",
1674 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1679 for (size_t nSubSection
= 0; nSubSection
< aFirstObjects
.size(); ++nSubSection
)
1681 size_t nFirstObject
= aFirstObjects
[nSubSection
];
1682 size_t nNumberOfObjects
= aNumberOfObjects
[nSubSection
];
1684 // This is the line as read from the stream.
1685 std::vector
<unsigned char> aOrigLine(nLineLength
);
1686 // This is the line as it appears after tweaking according to nPredictor.
1687 std::vector
<unsigned char> aFilteredLine(nLineLength
);
1688 for (size_t nEntry
= 0; nEntry
< nNumberOfObjects
; ++nEntry
)
1690 size_t nIndex
= nFirstObject
+ nEntry
;
1692 aStream
.ReadBytes(aOrigLine
.data(), aOrigLine
.size());
1693 if (nPredictor
> 1 && aOrigLine
[0] + 10 != nPredictor
)
1695 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1696 "inconsistent with /DecodeParms/Predictor for object #"
1701 for (int i
= 0; i
< nLineLength
; ++i
)
1709 // PNG prediction: up (on all rows).
1710 aFilteredLine
[i
] = aFilteredLine
[i
] + aOrigLine
[i
];
1713 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1719 // First character is already handled above.
1722 // Start of the current field in the stream data.
1724 for (; nPos
< nOffset
+ aW
[0]; ++nPos
)
1726 unsigned char nCh
= aFilteredLine
[nPos
];
1727 nType
= (nType
<< 8) + nCh
;
1730 // Start of the object in the file stream.
1731 size_t nStreamOffset
= 0;
1733 for (; nPos
< nOffset
+ aW
[1]; ++nPos
)
1735 unsigned char nCh
= aFilteredLine
[nPos
];
1736 nStreamOffset
= (nStreamOffset
<< 8) + nCh
;
1739 // Generation number of the object.
1740 size_t nGenerationNumber
= 0;
1742 for (; nPos
< nOffset
+ aW
[2]; ++nPos
)
1744 unsigned char nCh
= aFilteredLine
[nPos
];
1745 nGenerationNumber
= (nGenerationNumber
<< 8) + nCh
;
1748 // Ignore invalid nType.
1751 if (m_aXRef
.find(nIndex
) == m_aXRef
.end())
1757 aEntry
.SetType(XRefEntryType::FREE
);
1760 aEntry
.SetType(XRefEntryType::NOT_COMPRESSED
);
1763 aEntry
.SetType(XRefEntryType::COMPRESSED
);
1766 aEntry
.SetOffset(nStreamOffset
);
1767 m_aXRef
[nIndex
] = aEntry
;
1774 void PDFDocument::ReadXRef(SvStream
& rStream
)
1776 PDFDocument::SkipWhitespace(rStream
);
1780 PDFNumberElement aFirstObject
;
1781 if (!aFirstObject
.Read(rStream
))
1783 // Next token is not a number, it'll be the trailer.
1787 if (aFirstObject
.GetValue() < 0)
1789 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1793 PDFDocument::SkipWhitespace(rStream
);
1794 PDFNumberElement aNumberOfEntries
;
1795 if (!aNumberOfEntries
.Read(rStream
))
1797 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1801 if (aNumberOfEntries
.GetValue() < 0)
1803 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1807 size_t nSize
= aNumberOfEntries
.GetValue();
1808 for (size_t nEntry
= 0; nEntry
< nSize
; ++nEntry
)
1810 size_t nIndex
= aFirstObject
.GetValue() + nEntry
;
1811 PDFDocument::SkipWhitespace(rStream
);
1812 PDFNumberElement aOffset
;
1813 if (!aOffset
.Read(rStream
))
1815 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1819 PDFDocument::SkipWhitespace(rStream
);
1820 PDFNumberElement aGenerationNumber
;
1821 if (!aGenerationNumber
.Read(rStream
))
1823 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1827 PDFDocument::SkipWhitespace(rStream
);
1828 OString aKeyword
= ReadKeyword(rStream
);
1829 if (aKeyword
!= "f" && aKeyword
!= "n")
1831 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1834 // xrefs are read in reverse order, so never update an existing
1835 // offset with an older one.
1836 if (m_aXRef
.find(nIndex
) == m_aXRef
.end())
1839 aEntry
.SetOffset(aOffset
.GetValue());
1840 // Initially only the first entry is dirty.
1842 aEntry
.SetDirty(true);
1843 m_aXRef
[nIndex
] = aEntry
;
1845 PDFDocument::SkipWhitespace(rStream
);
1850 void PDFDocument::SkipWhitespace(SvStream
& rStream
)
1856 rStream
.ReadChar(ch
);
1860 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch
)))
1862 rStream
.SeekRel(-1);
1868 void PDFDocument::SkipLineBreaks(SvStream
& rStream
)
1874 rStream
.ReadChar(ch
);
1878 if (ch
!= '\n' && ch
!= '\r')
1880 rStream
.SeekRel(-1);
1886 size_t PDFDocument::GetObjectOffset(size_t nIndex
) const
1888 auto it
= m_aXRef
.find(nIndex
);
1889 if (it
== m_aXRef
.end() || it
->second
.GetType() == XRefEntryType::COMPRESSED
)
1891 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1892 << nIndex
<< ", but failed");
1896 return it
->second
.GetOffset();
1899 const std::vector
<std::unique_ptr
<PDFElement
>>& PDFDocument::GetElements() const
1904 /// Visits the page tree recursively, looking for page objects.
1905 static void visitPages(PDFObjectElement
* pPages
, std::vector
<PDFObjectElement
*>& rRet
)
1907 auto pKids
= dynamic_cast<PDFArrayElement
*>(pPages
->Lookup("Kids"));
1910 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1914 pPages
->setVisiting(true);
1916 for (const auto& pKid
: pKids
->GetElements())
1918 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pKid
);
1922 PDFObjectElement
* pKidObject
= pReference
->LookupObject();
1926 // detect if visiting reenters itself
1927 if (pKidObject
->alreadyVisiting())
1929 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1933 auto pName
= dynamic_cast<PDFNameElement
*>(pKidObject
->Lookup("Type"));
1934 if (pName
&& pName
->GetValue() == "Pages")
1935 // Pages inside pages: recurse.
1936 visitPages(pKidObject
, rRet
);
1938 // Found an actual page.
1939 rRet
.push_back(pKidObject
);
1942 pPages
->setVisiting(false);
1945 PDFObjectElement
* PDFDocument::GetCatalog()
1947 PDFReferenceElement
* pRoot
= nullptr;
1949 PDFTrailerElement
* pTrailer
= nullptr;
1950 if (!m_aTrailerOffsets
.empty())
1952 // Get access to the latest trailer, and work with the keys of that
1954 auto it
= m_aOffsetTrailers
.find(m_aTrailerOffsets
[0]);
1955 if (it
!= m_aOffsetTrailers
.end())
1956 pTrailer
= it
->second
;
1960 pRoot
= dynamic_cast<PDFReferenceElement
*>(pTrailer
->Lookup("Root"));
1961 else if (m_pXRefStream
)
1962 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Root"));
1966 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1970 return pRoot
->LookupObject();
1973 std::vector
<PDFObjectElement
*> PDFDocument::GetPages()
1975 std::vector
<PDFObjectElement
*> aRet
;
1977 PDFObjectElement
* pCatalog
= GetCatalog();
1980 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1984 PDFObjectElement
* pPages
= pCatalog
->LookupObject("Pages");
1987 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog
->GetObjectValue()
1988 << ") has no pages");
1992 visitPages(pPages
, aRet
);
1997 void PDFDocument::PushBackEOF(size_t nOffset
) { m_aEOFs
.push_back(nOffset
); }
1999 std::vector
<PDFObjectElement
*> PDFDocument::GetSignatureWidgets()
2001 std::vector
<PDFObjectElement
*> aRet
;
2003 std::vector
<PDFObjectElement
*> aPages
= GetPages();
2005 for (const auto& pPage
: aPages
)
2010 PDFElement
* pAnnotsElement
= pPage
->Lookup("Annots");
2011 auto pAnnots
= dynamic_cast<PDFArrayElement
*>(pAnnotsElement
);
2014 // Annots is not an array, see if it's a reference to an object
2015 // with a direct array.
2016 auto pAnnotsRef
= dynamic_cast<PDFReferenceElement
*>(pAnnotsElement
);
2019 if (PDFObjectElement
* pAnnotsObject
= pAnnotsRef
->LookupObject())
2021 pAnnots
= pAnnotsObject
->GetArray();
2029 for (const auto& pAnnot
: pAnnots
->GetElements())
2031 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pAnnot
);
2035 PDFObjectElement
* pAnnotObject
= pReference
->LookupObject();
2039 auto pFT
= dynamic_cast<PDFNameElement
*>(pAnnotObject
->Lookup("FT"));
2040 if (!pFT
|| pFT
->GetValue() != "Sig")
2043 aRet
.push_back(pAnnotObject
);
2050 std::vector
<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement
const* pElement
)
2052 return svl::crypto::DecodeHexString(pElement
->GetValue());
2055 OUString
PDFDocument::DecodeHexStringUTF16BE(PDFHexStringElement
const& rElement
)
2057 std::vector
<unsigned char> const encoded(DecodeHexString(&rElement
));
2058 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2059 // only the latter supported is here
2060 if (encoded
.size() < 2 || encoded
[0] != 0xFE || encoded
[1] != 0xFF || (encoded
.size() & 1) != 0)
2064 OUStringBuffer
buf(static_cast<unsigned int>(encoded
.size() - 2));
2065 for (size_t i
= 2; i
< encoded
.size(); i
+= 2)
2067 buf
.append(sal_Unicode((static_cast<sal_uInt16
>(encoded
[i
]) << 8) | encoded
[i
+ 1]));
2069 return buf
.makeStringAndClear();
2072 PDFCommentElement::PDFCommentElement(PDFDocument
& rDoc
)
2077 bool PDFCommentElement::Read(SvStream
& rStream
)
2079 // Read from (including) the % char till (excluding) the end of the line/stream.
2082 rStream
.ReadChar(ch
);
2085 if (ch
== '\n' || ch
== '\r' || rStream
.eof())
2087 m_aComment
= aBuf
.makeStringAndClear();
2089 if (m_aComment
.startsWith("%%EOF"))
2091 sal_uInt64 nPos
= rStream
.Tell();
2094 rStream
.ReadChar(ch
);
2095 rStream
.SeekRel(-1);
2096 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2103 m_rDoc
.PushBackEOF(nPos
);
2106 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment
<< "'");
2110 rStream
.ReadChar(ch
);
2116 PDFNumberElement::PDFNumberElement() = default;
2118 bool PDFNumberElement::Read(SvStream
& rStream
)
2121 m_nOffset
= rStream
.Tell();
2123 rStream
.ReadChar(ch
);
2128 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch
)) && ch
!= '-' && ch
!= '+' && ch
!= '.')
2130 rStream
.SeekRel(-1);
2133 while (!rStream
.eof())
2135 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch
)) && ch
!= '-' && ch
!= '+'
2138 rStream
.SeekRel(-1);
2139 m_nLength
= rStream
.Tell() - m_nOffset
;
2140 m_fValue
= aBuf
.makeStringAndClear().toDouble();
2141 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue
<< "'");
2145 rStream
.ReadChar(ch
);
2151 sal_uInt64
PDFNumberElement::GetLocation() const { return m_nOffset
; }
2153 sal_uInt64
PDFNumberElement::GetLength() const { return m_nLength
; }
2155 bool PDFBooleanElement::Read(SvStream
& /*rStream*/) { return true; }
2157 bool PDFNullElement::Read(SvStream
& /*rStream*/) { return true; }
2159 bool PDFHexStringElement::Read(SvStream
& rStream
)
2162 rStream
.ReadChar(ch
);
2165 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2168 rStream
.ReadChar(ch
);
2171 while (!rStream
.eof())
2175 m_aValue
= aBuf
.makeStringAndClear();
2176 SAL_INFO("vcl.filter",
2177 "PDFHexStringElement::Read: m_aValue length is " << m_aValue
.getLength());
2181 rStream
.ReadChar(ch
);
2187 const OString
& PDFHexStringElement::GetValue() const { return m_aValue
; }
2189 bool PDFLiteralStringElement::Read(SvStream
& rStream
)
2193 rStream
.ReadChar(ch
);
2196 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2200 rStream
.ReadChar(ch
);
2202 // Start with 1 nesting level as we read a '(' above already.
2205 while (!rStream
.eof())
2207 if (ch
== '(' && nPrevCh
!= '\\')
2210 if (ch
== ')' && nPrevCh
!= '\\')
2215 // ')' of the outermost '(' is reached.
2216 m_aValue
= aBuf
.makeStringAndClear();
2217 SAL_INFO("vcl.filter",
2218 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue
<< "'");
2223 rStream
.ReadChar(ch
);
2229 const OString
& PDFLiteralStringElement::GetValue() const { return m_aValue
; }
2231 PDFTrailerElement::PDFTrailerElement(PDFDocument
& rDoc
)
2233 , m_pDictionaryElement(nullptr)
2237 bool PDFTrailerElement::Read(SvStream
& rStream
)
2239 m_nOffset
= rStream
.Tell();
2243 PDFElement
* PDFTrailerElement::Lookup(const OString
& rDictionaryKey
)
2245 if (!m_pDictionaryElement
)
2247 PDFObjectParser
aParser(m_rDoc
.GetElements());
2248 aParser
.parse(this);
2250 if (!m_pDictionaryElement
)
2252 return m_pDictionaryElement
->LookupElement(rDictionaryKey
);
2255 sal_uInt64
PDFTrailerElement::GetLocation() const { return m_nOffset
; }
2257 double PDFNumberElement::GetValue() const { return m_fValue
; }
2259 PDFObjectElement::PDFObjectElement(PDFDocument
& rDoc
, double fObjectValue
, double fGenerationValue
)
2261 , m_fObjectValue(fObjectValue
)
2262 , m_fGenerationValue(fGenerationValue
)
2263 , m_pNumberElement(nullptr)
2264 , m_nDictionaryOffset(0)
2265 , m_nDictionaryLength(0)
2266 , m_pDictionaryElement(nullptr)
2269 , m_pArrayElement(nullptr)
2270 , m_pStreamElement(nullptr)
2275 bool PDFObjectElement::Read(SvStream
& /*rStream*/)
2277 SAL_INFO("vcl.filter",
2278 "PDFObjectElement::Read: " << m_fObjectValue
<< " " << m_fGenerationValue
<< " obj");
2282 PDFDictionaryElement::PDFDictionaryElement() = default;
2284 PDFElement
* PDFDictionaryElement::Lookup(const std::map
<OString
, PDFElement
*>& rDictionary
,
2285 const OString
& rKey
)
2287 auto it
= rDictionary
.find(rKey
);
2288 if (it
== rDictionary
.end())
2294 PDFObjectElement
* PDFDictionaryElement::LookupObject(const OString
& rDictionaryKey
)
2296 auto pKey
= dynamic_cast<PDFReferenceElement
*>(
2297 PDFDictionaryElement::Lookup(m_aItems
, rDictionaryKey
));
2300 SAL_WARN("vcl.filter",
2301 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2306 return pKey
->LookupObject();
2309 PDFElement
* PDFDictionaryElement::LookupElement(const OString
& rDictionaryKey
)
2311 return PDFDictionaryElement::Lookup(m_aItems
, rDictionaryKey
);
2314 void PDFObjectElement::parseIfNecessary()
2318 if (!m_aElements
.empty())
2320 // This is a stored object in an object stream.
2321 PDFObjectParser
aParser(m_aElements
);
2322 aParser
.parse(this);
2326 // Normal object: elements are stored as members of the document itself.
2327 PDFObjectParser
aParser(m_rDoc
.GetElements());
2328 aParser
.parse(this);
2334 PDFElement
* PDFObjectElement::Lookup(const OString
& rDictionaryKey
)
2337 if (!m_pDictionaryElement
)
2339 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey
);
2342 PDFObjectElement
* PDFObjectElement::LookupObject(const OString
& rDictionaryKey
)
2344 auto pKey
= dynamic_cast<PDFReferenceElement
*>(Lookup(rDictionaryKey
));
2347 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2352 return pKey
->LookupObject();
2355 double PDFObjectElement::GetObjectValue() const { return m_fObjectValue
; }
2357 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset
)
2359 m_nDictionaryOffset
= nDictionaryOffset
;
2362 sal_uInt64
PDFObjectElement::GetDictionaryOffset()
2365 return m_nDictionaryOffset
;
2368 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset
) { m_nArrayOffset
= nArrayOffset
; }
2370 sal_uInt64
PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset
; }
2372 void PDFDictionaryElement::SetKeyOffset(const OString
& rKey
, sal_uInt64 nOffset
)
2374 m_aDictionaryKeyOffset
[rKey
] = nOffset
;
2377 void PDFDictionaryElement::SetKeyValueLength(const OString
& rKey
, sal_uInt64 nLength
)
2379 m_aDictionaryKeyValueLength
[rKey
] = nLength
;
2382 sal_uInt64
PDFDictionaryElement::GetKeyOffset(const OString
& rKey
) const
2384 auto it
= m_aDictionaryKeyOffset
.find(rKey
);
2385 if (it
== m_aDictionaryKeyOffset
.end())
2391 sal_uInt64
PDFDictionaryElement::GetKeyValueLength(const OString
& rKey
) const
2393 auto it
= m_aDictionaryKeyValueLength
.find(rKey
);
2394 if (it
== m_aDictionaryKeyValueLength
.end())
2400 const std::map
<OString
, PDFElement
*>& PDFDictionaryElement::GetItems() const { return m_aItems
; }
2402 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength
)
2404 m_nDictionaryLength
= nDictionaryLength
;
2407 sal_uInt64
PDFObjectElement::GetDictionaryLength()
2410 return m_nDictionaryLength
;
2413 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength
) { m_nArrayLength
= nArrayLength
; }
2415 sal_uInt64
PDFObjectElement::GetArrayLength() const { return m_nArrayLength
; }
2417 PDFDictionaryElement
* PDFObjectElement::GetDictionary()
2420 return m_pDictionaryElement
;
2423 void PDFObjectElement::SetDictionary(PDFDictionaryElement
* pDictionaryElement
)
2425 m_pDictionaryElement
= pDictionaryElement
;
2428 void PDFObjectElement::SetNumberElement(PDFNumberElement
* pNumberElement
)
2430 m_pNumberElement
= pNumberElement
;
2433 PDFNumberElement
* PDFObjectElement::GetNumberElement() const { return m_pNumberElement
; }
2435 const std::vector
<PDFReferenceElement
*>& PDFObjectElement::GetDictionaryReferences() const
2437 return m_aDictionaryReferences
;
2440 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement
* pReference
)
2442 m_aDictionaryReferences
.push_back(pReference
);
2445 const std::map
<OString
, PDFElement
*>& PDFObjectElement::GetDictionaryItems()
2448 return m_pDictionaryElement
->GetItems();
2451 void PDFObjectElement::SetArray(PDFArrayElement
* pArrayElement
) { m_pArrayElement
= pArrayElement
; }
2453 void PDFObjectElement::SetStream(PDFStreamElement
* pStreamElement
)
2455 m_pStreamElement
= pStreamElement
;
2458 PDFStreamElement
* PDFObjectElement::GetStream() const { return m_pStreamElement
; }
2460 PDFArrayElement
* PDFObjectElement::GetArray()
2463 return m_pArrayElement
;
2466 void PDFObjectElement::ParseStoredObjects()
2468 if (!m_pStreamElement
)
2470 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2474 auto pType
= dynamic_cast<PDFNameElement
*>(Lookup("Type"));
2475 if (!pType
|| pType
->GetValue() != "ObjStm")
2478 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2480 SAL_WARN("vcl.filter",
2481 "PDFDocument::ReadXRefStream: unexpected type: " << pType
->GetValue());
2485 auto pFilter
= dynamic_cast<PDFNameElement
*>(Lookup("Filter"));
2486 if (!pFilter
|| pFilter
->GetValue() != "FlateDecode")
2489 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2491 SAL_WARN("vcl.filter",
2492 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter
->GetValue());
2496 auto pFirst
= dynamic_cast<PDFNumberElement
*>(Lookup("First"));
2499 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2503 auto pN
= dynamic_cast<PDFNumberElement
*>(Lookup("N"));
2506 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2509 size_t nN
= pN
->GetValue();
2511 auto pLength
= dynamic_cast<PDFNumberElement
*>(Lookup("Length"));
2514 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2517 size_t nLength
= pLength
->GetValue();
2519 // Read and decompress it.
2520 SvMemoryStream
& rEditBuffer
= m_rDoc
.GetEditBuffer();
2521 rEditBuffer
.Seek(m_pStreamElement
->GetOffset());
2522 std::vector
<char> aBuf(nLength
);
2523 rEditBuffer
.ReadBytes(aBuf
.data(), aBuf
.size());
2524 SvMemoryStream
aSource(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
2525 SvMemoryStream aStream
;
2527 aZCodec
.BeginCompression();
2528 aZCodec
.Decompress(aSource
, aStream
);
2529 if (!aZCodec
.EndCompression())
2531 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2535 nLength
= aStream
.TellEnd();
2537 std::vector
<size_t> aObjNums
;
2538 std::vector
<size_t> aOffsets
;
2539 std::vector
<size_t> aLengths
;
2540 // First iterate over and find out the lengths.
2541 for (size_t nObject
= 0; nObject
< nN
; ++nObject
)
2543 PDFNumberElement aObjNum
;
2544 if (!aObjNum
.Read(aStream
))
2546 SAL_WARN("vcl.filter",
2547 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2550 aObjNums
.push_back(aObjNum
.GetValue());
2552 PDFDocument::SkipWhitespace(aStream
);
2554 PDFNumberElement aByteOffset
;
2555 if (!aByteOffset
.Read(aStream
))
2557 SAL_WARN("vcl.filter",
2558 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2561 aOffsets
.push_back(pFirst
->GetValue() + aByteOffset
.GetValue());
2563 if (aOffsets
.size() > 1)
2564 aLengths
.push_back(aOffsets
.back() - aOffsets
[aOffsets
.size() - 2]);
2565 if (nObject
+ 1 == nN
)
2566 aLengths
.push_back(nLength
- aOffsets
.back());
2568 PDFDocument::SkipWhitespace(aStream
);
2571 // Now create streams with the proper length and tokenize the data.
2572 for (size_t nObject
= 0; nObject
< nN
; ++nObject
)
2574 size_t nObjNum
= aObjNums
[nObject
];
2575 size_t nOffset
= aOffsets
[nObject
];
2576 size_t nLen
= aLengths
[nObject
];
2578 aStream
.Seek(nOffset
);
2579 m_aStoredElements
.push_back(std::make_unique
<PDFObjectElement
>(m_rDoc
, nObjNum
, 0));
2580 PDFObjectElement
* pStored
= m_aStoredElements
.back().get();
2584 aStream
.ReadBytes(aBuf
.data(), aBuf
.size());
2585 SvMemoryStream
aStoredStream(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
2587 m_rDoc
.Tokenize(aStoredStream
, TokenizeMode::STORED_OBJECT
, pStored
->GetStoredElements(),
2589 // This is how references know the object is stored inside this object stream.
2590 m_rDoc
.SetIDObject(nObjNum
, pStored
);
2592 // Store the stream of the object in the object stream for later use.
2593 std::unique_ptr
<SvMemoryStream
> pStreamBuffer(new SvMemoryStream());
2594 aStoredStream
.Seek(0);
2595 pStreamBuffer
->WriteStream(aStoredStream
);
2596 pStored
->SetStreamBuffer(pStreamBuffer
);
2600 std::vector
<std::unique_ptr
<PDFElement
>>& PDFObjectElement::GetStoredElements()
2605 SvMemoryStream
* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer
.get(); }
2607 void PDFObjectElement::SetStreamBuffer(std::unique_ptr
<SvMemoryStream
>& pStreamBuffer
)
2609 m_pStreamBuffer
= std::move(pStreamBuffer
);
2612 PDFDocument
& PDFObjectElement::GetDocument() { return m_rDoc
; }
2614 PDFReferenceElement::PDFReferenceElement(PDFDocument
& rDoc
, PDFNumberElement
& rObject
,
2615 PDFNumberElement
const& rGeneration
)
2617 , m_fObjectValue(rObject
.GetValue())
2618 , m_fGenerationValue(rGeneration
.GetValue())
2619 , m_rObject(rObject
)
2623 PDFNumberElement
& PDFReferenceElement::GetObjectElement() const { return m_rObject
; }
2625 bool PDFReferenceElement::Read(SvStream
& rStream
)
2627 SAL_INFO("vcl.filter",
2628 "PDFReferenceElement::Read: " << m_fObjectValue
<< " " << m_fGenerationValue
<< " R");
2629 m_nOffset
= rStream
.Tell();
2633 sal_uInt64
PDFReferenceElement::GetOffset() const { return m_nOffset
; }
2635 double PDFReferenceElement::LookupNumber(SvStream
& rStream
) const
2637 size_t nOffset
= m_rDoc
.GetObjectOffset(m_fObjectValue
);
2640 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2645 sal_uInt64 nOrigPos
= rStream
.Tell();
2646 comphelper::ScopeGuard
g([&]() { rStream
.Seek(nOrigPos
); });
2648 rStream
.Seek(nOffset
);
2650 PDFDocument::SkipWhitespace(rStream
);
2651 PDFNumberElement aNumber
;
2652 bool bRet
= aNumber
.Read(rStream
);
2653 if (!bRet
|| aNumber
.GetValue() != m_fObjectValue
)
2655 SAL_WARN("vcl.filter",
2656 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2662 PDFDocument::SkipWhitespace(rStream
);
2663 PDFNumberElement aNumber
;
2664 bool bRet
= aNumber
.Read(rStream
);
2665 if (!bRet
|| aNumber
.GetValue() != m_fGenerationValue
)
2667 SAL_WARN("vcl.filter",
2668 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2674 PDFDocument::SkipWhitespace(rStream
);
2675 OString aKeyword
= PDFDocument::ReadKeyword(rStream
);
2676 if (aKeyword
!= "obj")
2678 SAL_WARN("vcl.filter",
2679 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2684 PDFDocument::SkipWhitespace(rStream
);
2685 PDFNumberElement aNumber
;
2686 if (!aNumber
.Read(rStream
))
2688 SAL_WARN("vcl.filter",
2689 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2693 return aNumber
.GetValue();
2696 PDFObjectElement
* PDFReferenceElement::LookupObject()
2698 return m_rDoc
.LookupObject(m_fObjectValue
);
2701 PDFObjectElement
* PDFDocument::LookupObject(size_t nObjectNumber
)
2703 auto itIDObjects
= m_aIDObjects
.find(nObjectNumber
);
2705 if (itIDObjects
!= m_aIDObjects
.end())
2706 return itIDObjects
->second
;
2708 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber
);
2712 SvMemoryStream
& PDFDocument::GetEditBuffer() { return m_aEditBuffer
; }
2714 int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue
; }
2716 int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue
; }
2718 bool PDFDictionaryElement::Read(SvStream
& rStream
)
2721 rStream
.ReadChar(ch
);
2724 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch
);
2730 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2734 rStream
.ReadChar(ch
);
2737 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch
);
2741 m_nLocation
= rStream
.Tell();
2743 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2748 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2750 sal_uInt64
PDFEndDictionaryElement::GetLocation() const { return m_nLocation
; }
2752 bool PDFEndDictionaryElement::Read(SvStream
& rStream
)
2754 m_nLocation
= rStream
.Tell();
2756 rStream
.ReadChar(ch
);
2759 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch
);
2765 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2769 rStream
.ReadChar(ch
);
2772 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch
);
2776 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2781 PDFNameElement::PDFNameElement() = default;
2783 bool PDFNameElement::Read(SvStream
& rStream
)
2786 rStream
.ReadChar(ch
);
2789 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch
);
2792 m_nLocation
= rStream
.Tell();
2796 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2800 // Read till the first white-space.
2802 rStream
.ReadChar(ch
);
2803 while (!rStream
.eof())
2805 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch
)) || ch
== '/' || ch
== '['
2806 || ch
== ']' || ch
== '<' || ch
== '>' || ch
== '(')
2808 rStream
.SeekRel(-1);
2809 m_aValue
= aBuf
.makeStringAndClear();
2810 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue
<< "'");
2814 rStream
.ReadChar(ch
);
2820 const OString
& PDFNameElement::GetValue() const { return m_aValue
; }
2822 sal_uInt64
PDFNameElement::GetLocation() const { return m_nLocation
; }
2824 PDFStreamElement::PDFStreamElement(size_t nLength
)
2825 : m_nLength(nLength
)
2830 bool PDFStreamElement::Read(SvStream
& rStream
)
2832 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength
);
2833 m_nOffset
= rStream
.Tell();
2834 std::vector
<unsigned char> aBytes(m_nLength
);
2835 rStream
.ReadBytes(aBytes
.data(), aBytes
.size());
2836 m_aMemory
.WriteBytes(aBytes
.data(), aBytes
.size());
2838 return rStream
.good();
2841 SvMemoryStream
& PDFStreamElement::GetMemory() { return m_aMemory
; }
2843 sal_uInt64
PDFStreamElement::GetOffset() const { return m_nOffset
; }
2845 bool PDFEndStreamElement::Read(SvStream
& /*rStream*/) { return true; }
2847 bool PDFEndObjectElement::Read(SvStream
& /*rStream*/) { return true; }
2849 PDFArrayElement::PDFArrayElement(PDFObjectElement
* pObject
)
2850 : m_pObject(pObject
)
2854 bool PDFArrayElement::Read(SvStream
& rStream
)
2857 rStream
.ReadChar(ch
);
2860 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch
);
2864 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2869 void PDFArrayElement::PushBack(PDFElement
* pElement
)
2872 SAL_INFO("vcl.filter",
2873 "PDFArrayElement::PushBack: object is " << m_pObject
->GetObjectValue());
2874 m_aElements
.push_back(pElement
);
2877 const std::vector
<PDFElement
*>& PDFArrayElement::GetElements() const { return m_aElements
; }
2879 PDFEndArrayElement::PDFEndArrayElement() = default;
2881 bool PDFEndArrayElement::Read(SvStream
& rStream
)
2883 m_nOffset
= rStream
.Tell();
2885 rStream
.ReadChar(ch
);
2888 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch
);
2892 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2897 sal_uInt64
PDFEndArrayElement::GetOffset() const { return m_nOffset
; }
2901 size_t PDFObjectParser::parse(PDFElement
* pParsingElement
, size_t nStartIndex
, int nCurrentDepth
)
2903 // The index of last parsed element
2904 size_t nReturnIndex
= 0;
2906 pParsingElement
->setParsing(true);
2908 comphelper::ScopeGuard
aGuard([pParsingElement
]() { pParsingElement
->setParsing(false); });
2910 // Current object, if root is an object, else nullptr
2911 auto pParsingObject
= dynamic_cast<PDFObjectElement
*>(pParsingElement
);
2912 auto pParsingTrailer
= dynamic_cast<PDFTrailerElement
*>(pParsingElement
);
2914 // Current dictionary, if root is an dictionary, else nullptr
2915 auto pParsingDictionary
= dynamic_cast<PDFDictionaryElement
*>(pParsingElement
);
2917 // Current parsing array, if root is an array, else nullptr
2918 auto pParsingArray
= dynamic_cast<PDFArrayElement
*>(pParsingElement
);
2920 // Find out where the dictionary for this object starts.
2921 size_t nIndex
= nStartIndex
;
2922 for (size_t i
= nStartIndex
; i
< mrElements
.size(); ++i
)
2924 if (mrElements
[i
].get() == pParsingElement
)
2932 sal_uInt64 nNameOffset
= 0;
2933 std::vector
<PDFNumberElement
*> aNumbers
;
2935 sal_uInt64 nDictionaryOffset
= 0;
2937 // Current depth; 1 is current
2940 for (size_t i
= nIndex
; i
< mrElements
.size(); ++i
)
2942 auto* pCurrentElement
= mrElements
[i
].get();
2944 // Dictionary tokens can be nested, track enter/leave.
2945 if (auto pCurrentDictionary
= dynamic_cast<PDFDictionaryElement
*>(pCurrentElement
))
2947 // Handle previously stored number
2948 if (!aNumbers
.empty())
2950 if (pParsingDictionary
)
2952 PDFNumberElement
* pNumber
= aNumbers
.back();
2954 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
2956 pParsingDictionary
->insert(aName
, pNumber
);
2957 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
2958 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
2960 else if (pParsingArray
)
2962 for (auto& pNumber
: aNumbers
)
2963 pParsingArray
->PushBack(pNumber
);
2967 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2975 if (nDepth
== 1) // pParsingDictionary is the current one
2977 // First dictionary start, track start offset.
2978 nDictionaryOffset
= pCurrentDictionary
->GetLocation();
2982 // Then the toplevel dictionary of the object.
2983 pParsingObject
->SetDictionary(pCurrentDictionary
);
2984 pParsingObject
->SetDictionaryOffset(nDictionaryOffset
);
2985 pParsingDictionary
= pCurrentDictionary
;
2987 else if (pParsingTrailer
)
2989 pParsingTrailer
->SetDictionary(pCurrentDictionary
);
2990 pParsingDictionary
= pCurrentDictionary
;
2993 else if (!pCurrentDictionary
->alreadyParsing())
2997 pParsingArray
->PushBack(pCurrentDictionary
);
2999 else if (pParsingDictionary
)
3001 // Dictionary toplevel value.
3002 pParsingDictionary
->insert(aName
, pCurrentDictionary
);
3006 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3008 // Nested dictionary.
3009 const size_t nNextElementIndex
= parse(pCurrentDictionary
, i
, nCurrentDepth
+ 1);
3010 i
= std::max(i
, nNextElementIndex
- 1);
3013 else if (auto pCurrentEndDictionary
3014 = dynamic_cast<PDFEndDictionaryElement
*>(pCurrentElement
))
3016 // Handle previously stored number
3017 if (!aNumbers
.empty())
3019 if (pParsingDictionary
)
3021 PDFNumberElement
* pNumber
= aNumbers
.back();
3023 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3025 pParsingDictionary
->insert(aName
, pNumber
);
3026 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3027 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3029 else if (pParsingArray
)
3031 for (auto& pNumber
: aNumbers
)
3032 pParsingArray
->PushBack(pNumber
);
3036 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3042 if (pParsingDictionary
)
3044 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3045 sal_uInt64 nLength
= pCurrentEndDictionary
->GetLocation() - nNameOffset
+ 2;
3046 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3050 if (nDepth
== 1) // did the parsing ended
3052 // Last dictionary end, track length and stop parsing.
3055 sal_uInt64 nDictionaryLength
3056 = pCurrentEndDictionary
->GetLocation() - nDictionaryOffset
;
3057 pParsingObject
->SetDictionaryLength(nDictionaryLength
);
3065 else if (auto pCurrentArray
= dynamic_cast<PDFArrayElement
*>(pCurrentElement
))
3067 // Handle previously stored number
3068 if (!aNumbers
.empty())
3070 if (pParsingDictionary
)
3072 PDFNumberElement
* pNumber
= aNumbers
.back();
3075 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3076 pParsingDictionary
->insert(aName
, pNumber
);
3077 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3078 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3080 else if (pParsingArray
)
3082 for (auto& pNumber
: aNumbers
)
3083 pParsingArray
->PushBack(pNumber
);
3087 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3094 if (nDepth
== 1) // pParsingDictionary is the current one
3098 pParsingObject
->SetArray(pCurrentArray
);
3099 pParsingArray
= pCurrentArray
;
3102 else if (!pCurrentArray
->alreadyParsing())
3106 // Array is toplevel
3107 pParsingArray
->PushBack(pCurrentArray
);
3109 else if (pParsingDictionary
)
3111 // Dictionary toplevel value.
3112 pParsingDictionary
->insert(aName
, pCurrentArray
);
3115 const size_t nNextElementIndex
= parse(pCurrentArray
, i
, nCurrentDepth
+ 1);
3117 // ensure we go forwards and not endlessly loop
3118 i
= std::max(i
, nNextElementIndex
- 1);
3121 else if (auto pCurrentEndArray
= dynamic_cast<PDFEndArrayElement
*>(pCurrentElement
))
3123 // Handle previously stored number
3124 if (!aNumbers
.empty())
3126 if (pParsingDictionary
)
3128 PDFNumberElement
* pNumber
= aNumbers
.back();
3131 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3132 pParsingDictionary
->insert(aName
, pNumber
);
3133 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3134 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3136 else if (pParsingArray
)
3138 for (auto& pNumber
: aNumbers
)
3139 pParsingArray
->PushBack(pNumber
);
3143 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3149 if (nDepth
== 1) // did the pParsing ended
3151 // Last array end, track length and stop parsing.
3157 if (pParsingDictionary
)
3159 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3160 // Include the ending ']' in the length of the key - (array)value pair length.
3161 sal_uInt64 nLength
= pCurrentEndArray
->GetOffset() - nNameOffset
+ 1;
3162 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3168 else if (auto pCurrentName
= dynamic_cast<PDFNameElement
*>(pCurrentElement
))
3170 // Handle previously stored number
3171 if (!aNumbers
.empty())
3173 if (pParsingDictionary
)
3175 PDFNumberElement
* pNumber
= aNumbers
.back();
3178 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3179 pParsingDictionary
->insert(aName
, pNumber
);
3180 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3181 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3183 else if (pParsingArray
)
3185 for (auto& pNumber
: aNumbers
)
3186 pParsingArray
->PushBack(pNumber
);
3195 // if we are in an array, just push the name to array
3196 pParsingArray
->PushBack(pCurrentName
);
3198 else if (pParsingDictionary
)
3200 // if we are in a dictionary, we need to store the name as a possible key
3201 if (aName
.isEmpty())
3203 aName
= pCurrentName
->GetValue();
3204 nNameOffset
= pCurrentName
->GetLocation();
3208 sal_uInt64 nKeyLength
3209 = pCurrentName
->GetLocation() + pCurrentName
->GetLength() - nNameOffset
;
3210 pParsingDictionary
->insert(aName
, pCurrentName
);
3211 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3212 pParsingDictionary
->SetKeyValueLength(aName
, nKeyLength
);
3217 else if (auto pReference
= dynamic_cast<PDFReferenceElement
*>(pCurrentElement
))
3221 pParsingArray
->PushBack(pReference
);
3223 else if (pParsingDictionary
)
3225 sal_uInt64 nLength
= pReference
->GetOffset() - nNameOffset
;
3226 pParsingDictionary
->insert(aName
, pReference
);
3227 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3228 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3233 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3237 else if (auto pLiteralString
= dynamic_cast<PDFLiteralStringElement
*>(pCurrentElement
))
3241 pParsingArray
->PushBack(pLiteralString
);
3243 else if (pParsingDictionary
)
3245 pParsingDictionary
->insert(aName
, pLiteralString
);
3246 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3251 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3254 else if (auto pBoolean
= dynamic_cast<PDFBooleanElement
*>(pCurrentElement
))
3258 pParsingArray
->PushBack(pBoolean
);
3260 else if (pParsingDictionary
)
3262 pParsingDictionary
->insert(aName
, pBoolean
);
3263 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3268 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3271 else if (auto pHexString
= dynamic_cast<PDFHexStringElement
*>(pCurrentElement
))
3275 pParsingArray
->PushBack(pHexString
);
3277 else if (pParsingDictionary
)
3279 pParsingDictionary
->insert(aName
, pHexString
);
3280 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3284 else if (auto pNumberElement
= dynamic_cast<PDFNumberElement
*>(pCurrentElement
))
3286 // Just remember this, so that in case it's not a reference parameter,
3287 // we can handle it later.
3288 aNumbers
.push_back(pNumberElement
);
3290 else if (dynamic_cast<PDFEndObjectElement
*>(pCurrentElement
))
3292 // parsing of the object is finished
3295 else if (dynamic_cast<PDFObjectElement
*>(pCurrentElement
)
3296 || dynamic_cast<PDFTrailerElement
*>(pCurrentElement
))
3302 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3306 return nReturnIndex
;
3311 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */