1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <vcl/filter/pdfdocument.hxx>
11 #include <pdf/pdfcompat.hxx>
17 #include <com/sun/star/uno/Sequence.hxx>
18 #include <com/sun/star/security/XCertificate.hpp>
20 #include <comphelper/scopeguard.hxx>
21 #include <comphelper/string.hxx>
22 #include <o3tl/string_view.hxx>
23 #include <rtl/character.hxx>
24 #include <rtl/strbuf.hxx>
25 #include <rtl/string.hxx>
26 #include <sal/log.hxx>
27 #include <sal/types.h>
28 #include <svl/cryptosign.hxx>
29 #include <tools/zcodec.hxx>
30 #include <vcl/pdfwriter.hxx>
31 #include <o3tl/safeint.hxx>
33 #include <pdf/objectcopier.hxx>
34 #include <pdf/COSWriter.hxx>
36 using namespace com::sun::star
;
40 XRefEntry::XRefEntry() = default;
42 PDFDocument::PDFDocument() = default;
44 PDFDocument::~PDFDocument() = default;
46 bool PDFDocument::RemoveSignature(size_t nPosition
)
48 std::vector
<PDFObjectElement
*> aSignatures
= GetSignatureWidgets();
49 if (nPosition
>= aSignatures
.size())
51 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
55 if (aSignatures
.size() != m_aEOFs
.size() - 1)
57 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
58 "and incremental updates");
62 // The EOF offset is the end of the original file, without the signature at
64 m_aEditBuffer
.Seek(m_aEOFs
[nPosition
]);
65 // Drop all bytes after the current position.
66 m_aEditBuffer
.SetStreamSize(m_aEditBuffer
.Tell() + 1);
68 return m_aEditBuffer
.good();
71 sal_Int32
PDFDocument::createObject()
73 sal_Int32 nObject
= m_aXRef
.size();
74 m_aXRef
[nObject
] = XRefEntry();
78 bool PDFDocument::updateObject(sal_Int32 nObject
)
80 if (o3tl::make_unsigned(nObject
) >= m_aXRef
.size())
82 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
87 aEntry
.SetOffset(m_aEditBuffer
.Tell());
88 aEntry
.SetDirty(true);
89 m_aXRef
[nObject
] = aEntry
;
93 bool PDFDocument::writeBufferBytes(const void* pBuffer
, sal_uInt64 nBytes
)
95 std::size_t nWritten
= m_aEditBuffer
.WriteBytes(pBuffer
, nBytes
);
96 return nWritten
== nBytes
;
99 void PDFDocument::SetSignatureLine(std::vector
<sal_Int8
>&& rSignatureLine
)
101 m_aSignatureLine
= std::move(rSignatureLine
);
104 void PDFDocument::SetSignaturePage(size_t nPage
) { m_nSignaturePage
= nPage
; }
106 sal_uInt32
PDFDocument::GetNextSignature()
109 for (const auto& pSignature
: GetSignatureWidgets())
111 auto pT
= dynamic_cast<PDFLiteralStringElement
*>(pSignature
->Lookup("T"_ostr
));
115 const OString
& rValue
= pT
->GetValue();
116 std::string_view rest
;
117 if (!rValue
.startsWith("Signature", &rest
))
120 nRet
= std::max(nRet
, o3tl::toUInt32(rest
));
126 sal_Int32
PDFDocument::WriteSignatureObject(svl::crypto::SigningContext
& rSigningContext
,
127 const OUString
& rDescription
, bool bAdES
,
128 sal_uInt64
& rLastByteRangeOffset
,
129 sal_Int64
& rContentOffset
)
131 // Write signature object.
132 sal_Int32 nSignatureId
= m_aXRef
.size();
133 XRefEntry aSignatureEntry
;
134 aSignatureEntry
.SetOffset(m_aEditBuffer
.Tell());
135 aSignatureEntry
.SetDirty(true);
136 m_aXRef
[nSignatureId
] = aSignatureEntry
;
138 OStringBuffer
aSigBuffer(OString::number(nSignatureId
)
141 rContentOffset
= aSignatureEntry
.GetOffset() + aSigBuffer
.getLength();
142 // Reserve space for the PKCS#7 object.
143 OStringBuffer
aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH
);
144 comphelper::string::padToLength(aContentFiller
, MAX_SIGNATURE_CONTENT_LENGTH
, '0');
145 aSigBuffer
.append(aContentFiller
+ ">\n/Type/Sig/SubFilter");
147 aSigBuffer
.append("/ETSI.CAdES.detached");
149 aSigBuffer
.append("/adbe.pkcs7.detached");
152 aSigBuffer
.append(" /M (" + vcl::PDFWriter::GetDateTime(&rSigningContext
)
155 // Byte range: we can write offset1-length1 and offset2 right now, will
156 // write length2 later.
158 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
159 + OString::number(rContentOffset
- 1) + " "
160 + OString::number(rContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1) + " ");
161 rLastByteRangeOffset
= aSignatureEntry
.GetOffset() + aSigBuffer
.getLength();
162 // We don't know how many bytes we need for the last ByteRange value, this
164 OStringBuffer aByteRangeFiller
;
165 comphelper::string::padToLength(aByteRangeFiller
, 100, ' ');
166 aSigBuffer
.append(aByteRangeFiller
167 // Finish the Sig obj.
168 + " /Filter/Adobe.PPKMS");
170 if (!rDescription
.isEmpty())
172 pdf::COSWriter aWriter
;
173 aWriter
.writeKeyAndUnicode("/Reason", rDescription
);
174 aSigBuffer
.append(aWriter
.getLine());
177 aSigBuffer
.append(" >>\nendobj\n\n");
178 m_aEditBuffer
.WriteOString(aSigBuffer
);
183 sal_Int32
PDFDocument::WriteAppearanceObject(tools::Rectangle
& rSignatureRectangle
)
185 PDFDocument aPDFDocument
;
186 filter::PDFObjectElement
* pPage
= nullptr;
187 std::vector
<filter::PDFObjectElement
*> aContentStreams
;
189 if (!m_aSignatureLine
.empty())
191 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
193 SvMemoryStream aPDFStream
;
194 aPDFStream
.WriteBytes(m_aSignatureLine
.data(), m_aSignatureLine
.size());
196 if (!aPDFDocument
.Read(aPDFStream
))
198 SAL_WARN("vcl.filter",
199 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
203 std::vector
<filter::PDFObjectElement
*> aPages
= aPDFDocument
.GetPages();
206 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
213 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
217 // Calculate the bounding box.
218 PDFElement
* pMediaBox
= pPage
->Lookup("MediaBox"_ostr
);
219 auto pMediaBoxArray
= dynamic_cast<PDFArrayElement
*>(pMediaBox
);
220 if (!pMediaBoxArray
|| pMediaBoxArray
->GetElements().size() < 4)
222 SAL_WARN("vcl.filter",
223 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
226 const std::vector
<PDFElement
*>& rMediaBoxElements
= pMediaBoxArray
->GetElements();
227 auto pWidth
= dynamic_cast<PDFNumberElement
*>(rMediaBoxElements
[2]);
230 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
233 rSignatureRectangle
.setWidth(pWidth
->GetValue());
234 auto pHeight
= dynamic_cast<PDFNumberElement
*>(rMediaBoxElements
[3]);
237 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
240 rSignatureRectangle
.setHeight(pHeight
->GetValue());
242 if (PDFObjectElement
* pContentStream
= pPage
->LookupObject("Contents"_ostr
))
244 aContentStreams
.push_back(pContentStream
);
247 if (aContentStreams
.empty())
249 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
253 m_aSignatureLine
.clear();
255 // Write appearance object: allocate an ID.
256 sal_Int32 nAppearanceId
= m_aXRef
.size();
257 m_aXRef
[nAppearanceId
] = XRefEntry();
259 // Write the object content.
260 SvMemoryStream aEditBuffer
;
261 aEditBuffer
.WriteNumberAsString(nAppearanceId
);
262 aEditBuffer
.WriteOString(" 0 obj\n");
263 aEditBuffer
.WriteOString("<</Type/XObject\n/Subtype/Form\n");
265 PDFObjectCopier
aCopier(*this);
266 if (!aContentStreams
.empty())
268 assert(pPage
&& "aContentStreams is only filled if there was a pPage");
269 OStringBuffer aBuffer
;
270 aCopier
.copyPageResources(pPage
, aBuffer
);
271 aEditBuffer
.WriteOString(aBuffer
);
274 aEditBuffer
.WriteOString("/BBox[0 0 ");
275 aEditBuffer
.WriteNumberAsString(rSignatureRectangle
.getOpenWidth());
276 aEditBuffer
.WriteOString(" ");
277 aEditBuffer
.WriteNumberAsString(rSignatureRectangle
.getOpenHeight());
278 aEditBuffer
.WriteOString("]\n/Length ");
280 // Add the object to the doc-level edit buffer and update the offset.
281 SvMemoryStream aStream
;
282 bool bCompressed
= false;
283 sal_Int32 nLength
= 0;
284 if (!aContentStreams
.empty())
286 nLength
= PDFObjectCopier::copyPageStreams(aContentStreams
, aStream
, bCompressed
);
288 aEditBuffer
.WriteNumberAsString(nLength
);
291 aEditBuffer
.WriteOString(" /Filter/FlateDecode");
294 aEditBuffer
.WriteOString("\n>>\n");
296 aEditBuffer
.WriteOString("stream\n");
298 // Copy the original page streams to the form XObject stream.
300 aEditBuffer
.WriteStream(aStream
);
302 aEditBuffer
.WriteOString("\nendstream\nendobj\n\n");
305 XRefEntry aAppearanceEntry
;
306 aAppearanceEntry
.SetOffset(m_aEditBuffer
.Tell());
307 aAppearanceEntry
.SetDirty(true);
308 m_aXRef
[nAppearanceId
] = aAppearanceEntry
;
309 m_aEditBuffer
.WriteStream(aEditBuffer
);
311 return nAppearanceId
;
314 sal_Int32
PDFDocument::WriteAnnotObject(PDFObjectElement
const& rFirstPage
, sal_Int32 nSignatureId
,
315 sal_Int32 nAppearanceId
,
316 const tools::Rectangle
& rSignatureRectangle
)
318 // Decide what identifier to use for the new signature.
319 sal_uInt32 nNextSignature
= GetNextSignature();
321 // Write the Annot object, references nSignatureId and nAppearanceId.
322 sal_Int32 nAnnotId
= m_aXRef
.size();
323 XRefEntry aAnnotEntry
;
324 aAnnotEntry
.SetOffset(m_aEditBuffer
.Tell());
325 aAnnotEntry
.SetDirty(true);
326 m_aXRef
[nAnnotId
] = aAnnotEntry
;
327 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
328 m_aEditBuffer
.WriteOString(" 0 obj\n");
329 m_aEditBuffer
.WriteOString("<</Type/Annot/Subtype/Widget/F 132\n");
330 m_aEditBuffer
.WriteOString("/Rect[0 0 ");
331 m_aEditBuffer
.WriteNumberAsString(rSignatureRectangle
.getOpenWidth());
332 m_aEditBuffer
.WriteOString(" ");
333 m_aEditBuffer
.WriteNumberAsString(rSignatureRectangle
.getOpenHeight());
334 m_aEditBuffer
.WriteOString("]\n");
335 m_aEditBuffer
.WriteOString("/FT/Sig\n");
336 m_aEditBuffer
.WriteOString("/P ");
337 m_aEditBuffer
.WriteNumberAsString(rFirstPage
.GetObjectValue());
338 m_aEditBuffer
.WriteOString(" 0 R\n");
339 m_aEditBuffer
.WriteOString("/T(Signature");
340 m_aEditBuffer
.WriteNumberAsString(nNextSignature
);
341 m_aEditBuffer
.WriteOString(")\n");
342 m_aEditBuffer
.WriteOString("/V ");
343 m_aEditBuffer
.WriteNumberAsString(nSignatureId
);
344 m_aEditBuffer
.WriteOString(" 0 R\n");
345 m_aEditBuffer
.WriteOString("/DV ");
346 m_aEditBuffer
.WriteNumberAsString(nSignatureId
);
347 m_aEditBuffer
.WriteOString(" 0 R\n");
348 m_aEditBuffer
.WriteOString("/AP<<\n/N ");
349 m_aEditBuffer
.WriteNumberAsString(nAppearanceId
);
350 m_aEditBuffer
.WriteOString(" 0 R\n>>\n");
351 m_aEditBuffer
.WriteOString(">>\nendobj\n\n");
356 bool PDFDocument::WritePageObject(PDFObjectElement
& rFirstPage
, sal_Int32 nAnnotId
)
358 PDFElement
* pAnnots
= rFirstPage
.Lookup("Annots"_ostr
);
359 auto pAnnotsReference
= dynamic_cast<PDFReferenceElement
*>(pAnnots
);
360 if (pAnnotsReference
)
362 // Write the updated Annots key of the Page object.
363 PDFObjectElement
* pAnnotsObject
= pAnnotsReference
->LookupObject();
366 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
370 sal_uInt32 nAnnotsId
= pAnnotsObject
->GetObjectValue();
371 m_aXRef
[nAnnotsId
].SetType(XRefEntryType::NOT_COMPRESSED
);
372 m_aXRef
[nAnnotsId
].SetOffset(m_aEditBuffer
.Tell());
373 m_aXRef
[nAnnotsId
].SetDirty(true);
374 m_aEditBuffer
.WriteNumberAsString(nAnnotsId
);
375 m_aEditBuffer
.WriteOString(" 0 obj\n[");
377 // Write existing references.
378 PDFArrayElement
* pArray
= pAnnotsObject
->GetArray();
381 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
385 for (size_t i
= 0; i
< pArray
->GetElements().size(); ++i
)
387 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pArray
->GetElements()[i
]);
392 m_aEditBuffer
.WriteOString(" ");
393 m_aEditBuffer
.WriteNumberAsString(pReference
->GetObjectValue());
394 m_aEditBuffer
.WriteOString(" 0 R");
396 // Write our reference.
397 m_aEditBuffer
.WriteOString(" ");
398 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
399 m_aEditBuffer
.WriteOString(" 0 R");
401 m_aEditBuffer
.WriteOString("]\nendobj\n\n");
405 // Write the updated first page object, references nAnnotId.
406 sal_uInt32 nFirstPageId
= rFirstPage
.GetObjectValue();
407 if (nFirstPageId
>= m_aXRef
.size())
409 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
412 m_aXRef
[nFirstPageId
].SetOffset(m_aEditBuffer
.Tell());
413 m_aXRef
[nFirstPageId
].SetDirty(true);
414 m_aEditBuffer
.WriteNumberAsString(nFirstPageId
);
415 m_aEditBuffer
.WriteOString(" 0 obj\n");
416 m_aEditBuffer
.WriteOString("<<");
417 auto pAnnotsArray
= dynamic_cast<PDFArrayElement
*>(pAnnots
);
420 // No Annots key, just write the key with a single reference.
421 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
422 + rFirstPage
.GetDictionaryOffset(),
423 rFirstPage
.GetDictionaryLength());
424 m_aEditBuffer
.WriteOString("/Annots[");
425 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
426 m_aEditBuffer
.WriteOString(" 0 R]");
430 // Annots key is already there, insert our reference at the end.
431 PDFDictionaryElement
* pDictionary
= rFirstPage
.GetDictionary();
433 // Offset right before the end of the Annots array.
434 sal_uInt64 nAnnotsEndOffset
= pDictionary
->GetKeyOffset("Annots"_ostr
)
435 + pDictionary
->GetKeyValueLength("Annots"_ostr
) - 1;
436 // Length of beginning of the dictionary -> Annots end.
437 sal_uInt64 nAnnotsBeforeEndLength
= nAnnotsEndOffset
- rFirstPage
.GetDictionaryOffset();
438 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
439 + rFirstPage
.GetDictionaryOffset(),
440 nAnnotsBeforeEndLength
);
441 m_aEditBuffer
.WriteOString(" ");
442 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
443 m_aEditBuffer
.WriteOString(" 0 R");
444 // Length of Annots end -> end of the dictionary.
445 sal_uInt64 nAnnotsAfterEndLength
= rFirstPage
.GetDictionaryOffset()
446 + rFirstPage
.GetDictionaryLength()
448 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
450 nAnnotsAfterEndLength
);
452 m_aEditBuffer
.WriteOString(">>");
453 m_aEditBuffer
.WriteOString("\nendobj\n\n");
459 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId
, PDFReferenceElement
*& pRoot
)
462 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Root"_ostr
));
467 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
470 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pTrailer
->Lookup("Root"_ostr
));
474 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
477 PDFObjectElement
* pCatalog
= pRoot
->LookupObject();
480 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
483 sal_uInt32 nCatalogId
= pCatalog
->GetObjectValue();
484 if (nCatalogId
>= m_aXRef
.size())
486 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
489 PDFElement
* pAcroForm
= pCatalog
->Lookup("AcroForm"_ostr
);
490 auto pAcroFormReference
= dynamic_cast<PDFReferenceElement
*>(pAcroForm
);
491 if (pAcroFormReference
)
493 // Write the updated AcroForm key of the Catalog object.
494 PDFObjectElement
* pAcroFormObject
= pAcroFormReference
->LookupObject();
495 if (!pAcroFormObject
)
497 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
501 sal_uInt32 nAcroFormId
= pAcroFormObject
->GetObjectValue();
502 m_aXRef
[nAcroFormId
].SetType(XRefEntryType::NOT_COMPRESSED
);
503 m_aXRef
[nAcroFormId
].SetOffset(m_aEditBuffer
.Tell());
504 m_aXRef
[nAcroFormId
].SetDirty(true);
505 m_aEditBuffer
.WriteNumberAsString(nAcroFormId
);
506 m_aEditBuffer
.WriteOString(" 0 obj\n");
508 // If this is nullptr, then the AcroForm object is not in an object stream.
509 SvMemoryStream
* pStreamBuffer
= pAcroFormObject
->GetStreamBuffer();
511 if (!pAcroFormObject
->Lookup("Fields"_ostr
))
513 SAL_WARN("vcl.filter",
514 "PDFDocument::Sign: AcroForm object without required Fields key");
518 PDFDictionaryElement
* pAcroFormDictionary
= pAcroFormObject
->GetDictionary();
519 if (!pAcroFormDictionary
)
521 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
525 // Offset right before the end of the Fields array.
526 sal_uInt64 nFieldsEndOffset
= pAcroFormDictionary
->GetKeyOffset("Fields"_ostr
)
527 + pAcroFormDictionary
->GetKeyValueLength("Fields"_ostr
)
530 // Length of beginning of the object dictionary -> Fields end.
531 sal_uInt64 nFieldsBeforeEndLength
= nFieldsEndOffset
;
533 m_aEditBuffer
.WriteBytes(pStreamBuffer
->GetData(), nFieldsBeforeEndLength
);
536 nFieldsBeforeEndLength
-= pAcroFormObject
->GetDictionaryOffset();
537 m_aEditBuffer
.WriteOString("<<");
538 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
539 + pAcroFormObject
->GetDictionaryOffset(),
540 nFieldsBeforeEndLength
);
543 // Append our reference at the end of the Fields array.
544 m_aEditBuffer
.WriteOString(" ");
545 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
546 m_aEditBuffer
.WriteOString(" 0 R");
548 // Length of Fields end -> end of the object dictionary.
551 sal_uInt64 nFieldsAfterEndLength
= pStreamBuffer
->GetSize() - nFieldsEndOffset
;
552 m_aEditBuffer
.WriteBytes(static_cast<const char*>(pStreamBuffer
->GetData())
554 nFieldsAfterEndLength
);
558 sal_uInt64 nFieldsAfterEndLength
= pAcroFormObject
->GetDictionaryOffset()
559 + pAcroFormObject
->GetDictionaryLength()
561 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
563 nFieldsAfterEndLength
);
564 m_aEditBuffer
.WriteOString(">>");
567 m_aEditBuffer
.WriteOString("\nendobj\n\n");
571 // Write the updated Catalog object, references nAnnotId.
572 auto pAcroFormDictionary
= dynamic_cast<PDFDictionaryElement
*>(pAcroForm
);
573 m_aXRef
[nCatalogId
].SetOffset(m_aEditBuffer
.Tell());
574 m_aXRef
[nCatalogId
].SetDirty(true);
575 m_aEditBuffer
.WriteNumberAsString(nCatalogId
);
576 m_aEditBuffer
.WriteOString(" 0 obj\n");
577 m_aEditBuffer
.WriteOString("<<");
578 if (!pAcroFormDictionary
)
580 // No AcroForm key, assume no signatures.
581 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
582 + pCatalog
->GetDictionaryOffset(),
583 pCatalog
->GetDictionaryLength());
584 m_aEditBuffer
.WriteOString("/AcroForm<</Fields[\n");
585 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
586 m_aEditBuffer
.WriteOString(" 0 R\n]/SigFlags 3>>\n");
590 // AcroForm key is already there, insert our reference at the Fields end.
591 auto it
= pAcroFormDictionary
->GetItems().find("Fields"_ostr
);
592 if (it
== pAcroFormDictionary
->GetItems().end())
594 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
598 auto pFields
= dynamic_cast<PDFArrayElement
*>(it
->second
);
601 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
605 // Offset right before the end of the Fields array.
606 sal_uInt64 nFieldsEndOffset
= pAcroFormDictionary
->GetKeyOffset("Fields"_ostr
)
607 + pAcroFormDictionary
->GetKeyValueLength("Fields"_ostr
)
609 // Length of beginning of the Catalog dictionary -> Fields end.
610 sal_uInt64 nFieldsBeforeEndLength
= nFieldsEndOffset
- pCatalog
->GetDictionaryOffset();
611 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
612 + pCatalog
->GetDictionaryOffset(),
613 nFieldsBeforeEndLength
);
614 m_aEditBuffer
.WriteOString(" ");
615 m_aEditBuffer
.WriteNumberAsString(nAnnotId
);
616 m_aEditBuffer
.WriteOString(" 0 R");
617 // Length of Fields end -> end of the Catalog dictionary.
618 sal_uInt64 nFieldsAfterEndLength
= pCatalog
->GetDictionaryOffset()
619 + pCatalog
->GetDictionaryLength() - nFieldsEndOffset
;
620 m_aEditBuffer
.WriteBytes(static_cast<const char*>(m_aEditBuffer
.GetData())
622 nFieldsAfterEndLength
);
624 m_aEditBuffer
.WriteOString(">>\nendobj\n\n");
630 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset
, PDFReferenceElement
const* pRoot
)
634 // Write the xref stream.
635 // This is a bit meta: the xref stream stores its own offset.
636 sal_Int32 nXRefStreamId
= m_aXRef
.size();
637 XRefEntry aXRefStreamEntry
;
638 aXRefStreamEntry
.SetOffset(nXRefOffset
);
639 aXRefStreamEntry
.SetDirty(true);
640 m_aXRef
[nXRefStreamId
] = aXRefStreamEntry
;
642 // Write stream data.
643 SvMemoryStream aXRefStream
;
644 const size_t nOffsetLen
= 3;
645 // 3 additional bytes: predictor, the first and the third field.
646 const size_t nLineLength
= nOffsetLen
+ 3;
647 // This is the line as it appears before tweaking according to the predictor.
648 std::vector
<unsigned char> aOrigLine(nLineLength
);
649 // This is the previous line.
650 std::vector
<unsigned char> aPrevLine(nLineLength
);
651 // This is the line as written to the stream.
652 std::vector
<unsigned char> aFilteredLine(nLineLength
);
653 for (const auto& rXRef
: m_aXRef
)
655 const XRefEntry
& rEntry
= rXRef
.second
;
657 if (!rEntry
.GetDirty())
662 // PNG prediction: up (on all rows).
663 aOrigLine
[nPos
++] = 2;
666 unsigned char nType
= 0;
667 switch (rEntry
.GetType())
669 case XRefEntryType::FREE
:
672 case XRefEntryType::NOT_COMPRESSED
:
675 case XRefEntryType::COMPRESSED
:
679 aOrigLine
[nPos
++] = nType
;
682 for (size_t i
= 0; i
< nOffsetLen
; ++i
)
684 size_t nByte
= nOffsetLen
- i
- 1;
685 // Fields requiring more than one byte are stored with the
686 // high-order byte first.
687 unsigned char nCh
= (rEntry
.GetOffset() & (0xff << (nByte
* 8))) >> (nByte
* 8);
688 aOrigLine
[nPos
++] = nCh
;
692 aOrigLine
[nPos
++] = 0;
694 // Now apply the predictor.
695 aFilteredLine
[0] = aOrigLine
[0];
696 for (size_t i
= 1; i
< nLineLength
; ++i
)
698 // Count the delta vs the previous line.
699 aFilteredLine
[i
] = aOrigLine
[i
] - aPrevLine
[i
];
700 // Remember the new reference.
701 aPrevLine
[i
] = aOrigLine
[i
];
704 aXRefStream
.WriteBytes(aFilteredLine
.data(), aFilteredLine
.size());
707 m_aEditBuffer
.WriteNumberAsString(nXRefStreamId
);
708 m_aEditBuffer
.WriteOString(
709 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
712 auto pID
= dynamic_cast<PDFArrayElement
*>(m_pXRefStream
->Lookup("ID"_ostr
));
715 const std::vector
<PDFElement
*>& rElements
= pID
->GetElements();
716 m_aEditBuffer
.WriteOString("/ID [ <");
717 for (size_t i
= 0; i
< rElements
.size(); ++i
)
719 auto pIDString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
]);
723 m_aEditBuffer
.WriteOString(pIDString
->GetValue());
724 if ((i
+ 1) < rElements
.size())
725 m_aEditBuffer
.WriteOString("> <");
727 m_aEditBuffer
.WriteOString("> ] ");
731 m_aEditBuffer
.WriteOString("/Index [ ");
732 for (const auto& rXRef
: m_aXRef
)
734 if (!rXRef
.second
.GetDirty())
737 m_aEditBuffer
.WriteNumberAsString(rXRef
.first
);
738 m_aEditBuffer
.WriteOString(" 1 ");
740 m_aEditBuffer
.WriteOString("] ");
743 auto pInfo
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Info"_ostr
));
746 m_aEditBuffer
.WriteOString("/Info ");
747 m_aEditBuffer
.WriteNumberAsString(pInfo
->GetObjectValue());
748 m_aEditBuffer
.WriteOString(" ");
749 m_aEditBuffer
.WriteNumberAsString(pInfo
->GetGenerationValue());
750 m_aEditBuffer
.WriteOString(" R ");
754 m_aEditBuffer
.WriteOString("/Length ");
757 aZCodec
.BeginCompression();
759 SvMemoryStream aStream
;
760 aZCodec
.Compress(aXRefStream
, aStream
);
761 aZCodec
.EndCompression();
763 aXRefStream
.SetStreamSize(0);
765 aXRefStream
.WriteStream(aStream
);
767 m_aEditBuffer
.WriteNumberAsString(aXRefStream
.GetSize());
769 if (!m_aStartXRefs
.empty())
771 // Write location of the previous cross-reference section.
772 m_aEditBuffer
.WriteOString("/Prev ");
773 m_aEditBuffer
.WriteNumberAsString(m_aStartXRefs
.back());
777 m_aEditBuffer
.WriteOString("/Root ");
778 m_aEditBuffer
.WriteNumberAsString(pRoot
->GetObjectValue());
779 m_aEditBuffer
.WriteOString(" ");
780 m_aEditBuffer
.WriteNumberAsString(pRoot
->GetGenerationValue());
781 m_aEditBuffer
.WriteOString(" R ");
784 m_aEditBuffer
.WriteOString("/Size ");
785 m_aEditBuffer
.WriteNumberAsString(m_aXRef
.size());
787 m_aEditBuffer
.WriteOString("/Type/XRef/W[1 3 1]>>\nstream\n");
789 m_aEditBuffer
.WriteStream(aXRefStream
);
790 m_aEditBuffer
.WriteOString("\nendstream\nendobj\n\n");
794 // Write the xref table.
795 m_aEditBuffer
.WriteOString("xref\n");
796 for (const auto& rXRef
: m_aXRef
)
798 size_t nObject
= rXRef
.first
;
799 size_t nOffset
= rXRef
.second
.GetOffset();
800 if (!rXRef
.second
.GetDirty())
803 m_aEditBuffer
.WriteNumberAsString(nObject
);
804 m_aEditBuffer
.WriteOString(" 1\n");
805 OStringBuffer aBuffer
= OString::number(static_cast<sal_Int32
>(nOffset
));
806 while (aBuffer
.getLength() < 10)
807 aBuffer
.insert(0, "0");
809 aBuffer
.append(" 65535 f \n");
811 aBuffer
.append(" 00000 n \n");
812 m_aEditBuffer
.WriteOString(aBuffer
);
815 // Write the trailer.
816 m_aEditBuffer
.WriteOString("trailer\n<</Size ");
817 m_aEditBuffer
.WriteNumberAsString(m_aXRef
.size());
818 m_aEditBuffer
.WriteOString("/Root ");
819 m_aEditBuffer
.WriteNumberAsString(pRoot
->GetObjectValue());
820 m_aEditBuffer
.WriteOString(" ");
821 m_aEditBuffer
.WriteNumberAsString(pRoot
->GetGenerationValue());
822 m_aEditBuffer
.WriteOString(" R\n");
823 auto pInfo
= dynamic_cast<PDFReferenceElement
*>(m_pTrailer
->Lookup("Info"_ostr
));
826 m_aEditBuffer
.WriteOString("/Info ");
827 m_aEditBuffer
.WriteNumberAsString(pInfo
->GetObjectValue());
828 m_aEditBuffer
.WriteOString(" ");
829 m_aEditBuffer
.WriteNumberAsString(pInfo
->GetGenerationValue());
830 m_aEditBuffer
.WriteOString(" R\n");
832 auto pID
= dynamic_cast<PDFArrayElement
*>(m_pTrailer
->Lookup("ID"_ostr
));
835 const std::vector
<PDFElement
*>& rElements
= pID
->GetElements();
836 m_aEditBuffer
.WriteOString("/ID [ <");
837 for (size_t i
= 0; i
< rElements
.size(); ++i
)
839 auto pIDString
= dynamic_cast<PDFHexStringElement
*>(rElements
[i
]);
843 m_aEditBuffer
.WriteOString(pIDString
->GetValue());
844 if ((i
+ 1) < rElements
.size())
845 m_aEditBuffer
.WriteOString(">\n<");
847 m_aEditBuffer
.WriteOString("> ]\n");
850 if (!m_aStartXRefs
.empty())
852 // Write location of the previous cross-reference section.
853 m_aEditBuffer
.WriteOString("/Prev ");
854 m_aEditBuffer
.WriteNumberAsString(m_aStartXRefs
.back());
857 m_aEditBuffer
.WriteOString(">>\n");
861 bool PDFDocument::Sign(svl::crypto::SigningContext
& rSigningContext
, const OUString
& rDescription
,
864 m_aEditBuffer
.Seek(STREAM_SEEK_TO_END
);
865 m_aEditBuffer
.WriteOString("\n");
867 sal_uInt64 nSignatureLastByteRangeOffset
= 0;
868 sal_Int64 nSignatureContentOffset
= 0;
869 sal_Int32 nSignatureId
870 = WriteSignatureObject(rSigningContext
, rDescription
, bAdES
, nSignatureLastByteRangeOffset
,
871 nSignatureContentOffset
);
872 assert(nSignatureContentOffset
> 0
873 && "WriteSignatureObject guarantees a length for nSignatureContentOffset");
874 tools::Rectangle aSignatureRectangle
;
875 sal_Int32 nAppearanceId
= WriteAppearanceObject(aSignatureRectangle
);
877 std::vector
<PDFObjectElement
*> aPages
= GetPages();
880 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
885 if (m_nSignaturePage
< aPages
.size())
887 nPage
= m_nSignaturePage
;
891 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage
);
895 PDFObjectElement
& rPage
= *aPages
[nPage
];
896 sal_Int32 nAnnotId
= WriteAnnotObject(rPage
, nSignatureId
, nAppearanceId
, aSignatureRectangle
);
898 if (!WritePageObject(rPage
, nAnnotId
))
900 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
904 PDFReferenceElement
* pRoot
= nullptr;
905 if (!WriteCatalogObject(nAnnotId
, pRoot
))
907 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
911 sal_uInt64 nXRefOffset
= m_aEditBuffer
.Tell();
912 WriteXRef(nXRefOffset
, pRoot
);
915 m_aEditBuffer
.WriteOString("startxref\n");
916 m_aEditBuffer
.WriteNumberAsString(nXRefOffset
);
917 m_aEditBuffer
.WriteOString("\n%%EOF\n");
919 // Finalize the signature, now that we know the total file size.
920 // Calculate the length of the last byte range.
921 sal_uInt64 nFileEnd
= m_aEditBuffer
.Tell();
922 sal_Int64 nLastByteRangeLength
923 = nFileEnd
- (nSignatureContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
924 // Write the length to the buffer.
925 m_aEditBuffer
.Seek(nSignatureLastByteRangeOffset
);
926 OString aByteRangeBuffer
= OString::number(nLastByteRangeLength
) + " ]";
927 m_aEditBuffer
.WriteOString(aByteRangeBuffer
);
929 // Create the PKCS#7 object.
930 if (rSigningContext
.m_xCertificate
)
932 css::uno::Sequence
<sal_Int8
> aDerEncoded
= rSigningContext
.m_xCertificate
->getEncoded();
933 if (!aDerEncoded
.hasElements())
935 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
940 m_aEditBuffer
.Seek(0);
941 sal_uInt64 nBufferSize1
= nSignatureContentOffset
- 1;
942 std::unique_ptr
<char[]> aBuffer1(new char[nBufferSize1
]);
943 m_aEditBuffer
.ReadBytes(aBuffer1
.get(), nBufferSize1
);
945 m_aEditBuffer
.Seek(nSignatureContentOffset
+ MAX_SIGNATURE_CONTENT_LENGTH
+ 1);
946 sal_uInt64 nBufferSize2
= nLastByteRangeLength
;
947 std::unique_ptr
<char[]> aBuffer2(new char[nBufferSize2
]);
948 m_aEditBuffer
.ReadBytes(aBuffer2
.get(), nBufferSize2
);
950 OStringBuffer aCMSHexBuffer
;
951 if (rSigningContext
.m_aSignatureValue
.empty())
953 svl::crypto::Signing
aSigning(rSigningContext
);
954 aSigning
.AddDataRange(aBuffer1
.get(), nBufferSize1
);
955 aSigning
.AddDataRange(aBuffer2
.get(), nBufferSize2
);
956 if (!aSigning
.Sign(aCMSHexBuffer
))
958 if (rSigningContext
.m_xCertificate
.is())
960 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
967 // The signature value provided by the context: use that instead of building a new
969 for (unsigned char ch
: rSigningContext
.m_aSignatureValue
)
971 svl::crypto::Signing::appendHex(ch
, aCMSHexBuffer
);
975 assert(aCMSHexBuffer
.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH
);
977 m_aEditBuffer
.Seek(nSignatureContentOffset
);
978 m_aEditBuffer
.WriteOString(aCMSHexBuffer
);
983 bool PDFDocument::Write(SvStream
& rStream
)
985 m_aEditBuffer
.Seek(0);
986 rStream
.WriteStream(m_aEditBuffer
);
987 return rStream
.good();
990 bool PDFDocument::Tokenize(SvStream
& rStream
, TokenizeMode eMode
,
991 std::vector
<std::unique_ptr
<PDFElement
>>& rElements
,
992 PDFObjectElement
* pObjectElement
)
994 // Last seen object token.
995 PDFObjectElement
* pObject
= pObjectElement
;
996 PDFNameElement
* pObjectKey
= nullptr;
997 PDFObjectElement
* pObjectStream
= nullptr;
998 bool bInXRef
= false;
999 // The next number will be an xref offset.
1000 bool bInStartXRef
= false;
1001 // Dictionary depth, so we know when we're outside any dictionaries.
1003 // Last seen array token that's outside any dictionaries.
1004 PDFArrayElement
* pArray
= nullptr;
1005 // If we're inside an obj/endobj pair.
1006 bool bInObject
= false;
1011 rStream
.ReadChar(ch
);
1019 auto pComment
= new PDFCommentElement(*this);
1020 rElements
.push_back(std::unique_ptr
<PDFElement
>(pComment
));
1021 rStream
.SeekRel(-1);
1022 if (!rElements
.back()->Read(rStream
))
1024 SAL_WARN("vcl.filter",
1025 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1028 if (eMode
== TokenizeMode::EOF_TOKEN
&& !m_aEOFs
.empty()
1029 && m_aEOFs
.back() == rStream
.Tell())
1031 // Found EOF and partial parsing requested, we're done.
1038 // Dictionary or hex string.
1039 rStream
.ReadChar(ch
);
1040 rStream
.SeekRel(-2);
1043 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFDictionaryElement()));
1047 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFHexStringElement
));
1048 if (!rElements
.back()->Read(rStream
))
1050 SAL_WARN("vcl.filter",
1051 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1058 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndDictionaryElement()));
1060 rStream
.SeekRel(-1);
1061 if (!rElements
.back()->Read(rStream
))
1063 SAL_WARN("vcl.filter",
1064 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1071 auto pArr
= new PDFArrayElement(pObject
);
1072 rElements
.push_back(std::unique_ptr
<PDFElement
>(pArr
));
1075 // The array is attached directly, inform the object.
1079 pObject
->SetArray(pArray
);
1080 pObject
->SetArrayOffset(rStream
.Tell());
1084 rStream
.SeekRel(-1);
1085 if (!rElements
.back()->Read(rStream
))
1087 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1094 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndArrayElement()));
1096 rStream
.SeekRel(-1);
1101 pObject
->SetArrayLength(rStream
.Tell() - pObject
->GetArrayOffset());
1104 if (!rElements
.back()->Read(rStream
))
1106 SAL_WARN("vcl.filter",
1107 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1114 auto pNameElement
= new PDFNameElement();
1115 rElements
.push_back(std::unique_ptr
<PDFElement
>(pNameElement
));
1116 rStream
.SeekRel(-1);
1117 if (!pNameElement
->Read(rStream
))
1119 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1123 if (pObject
&& pObjectKey
&& pObjectKey
->GetValue() == "Type"
1124 && pNameElement
->GetValue() == "ObjStm")
1125 pObjectStream
= pObject
;
1127 pObjectKey
= pNameElement
;
1129 if (bInObject
&& !nDepth
&& pObject
)
1131 // Name element inside an object, but outside a
1132 // dictionary / array: remember it.
1133 pObject
->SetNameElement(pNameElement
);
1140 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFLiteralStringElement
));
1141 rStream
.SeekRel(-1);
1142 if (!rElements
.back()->Read(rStream
))
1144 SAL_WARN("vcl.filter",
1145 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1152 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)) || ch
== '-' || ch
== '+'
1155 // Numbering object: an integer or a real.
1156 auto pNumberElement
= new PDFNumberElement();
1157 rElements
.push_back(std::unique_ptr
<PDFElement
>(pNumberElement
));
1158 rStream
.SeekRel(-1);
1159 if (!pNumberElement
->Read(rStream
))
1161 SAL_WARN("vcl.filter",
1162 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1167 bInStartXRef
= false;
1168 m_aStartXRefs
.push_back(pNumberElement
->GetValue());
1170 auto it
= m_aOffsetObjects
.find(pNumberElement
->GetValue());
1171 if (it
!= m_aOffsetObjects
.end())
1172 m_pXRefStream
= it
->second
;
1174 else if (bInObject
&& !nDepth
&& pObject
)
1175 // Number element inside an object, but outside a
1176 // dictionary / array: remember it.
1177 pObject
->SetNumberElement(pNumberElement
);
1179 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
1181 // Possible keyword, like "obj".
1182 rStream
.SeekRel(-1);
1183 OString aKeyword
= ReadKeyword(rStream
);
1185 bool bObj
= aKeyword
== "obj";
1186 if (bObj
|| aKeyword
== "R")
1188 size_t nElements
= rElements
.size();
1191 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1192 "tokens before 'obj' or 'R' keyword");
1197 = dynamic_cast<PDFNumberElement
*>(rElements
[nElements
- 2].get());
1198 auto pGenerationNumber
1199 = dynamic_cast<PDFNumberElement
*>(rElements
[nElements
- 1].get());
1200 if (!pObjectNumber
|| !pGenerationNumber
)
1202 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1203 "generation number before 'obj' or 'R' keyword");
1209 pObject
= new PDFObjectElement(*this, pObjectNumber
->GetValue(),
1210 pGenerationNumber
->GetValue());
1211 rElements
.push_back(std::unique_ptr
<PDFElement
>(pObject
));
1212 m_aOffsetObjects
[pObjectNumber
->GetLocation()] = pObject
;
1213 m_aIDObjects
[pObjectNumber
->GetValue()] = pObject
;
1218 auto pReference
= new PDFReferenceElement(*this, *pObjectNumber
,
1219 *pGenerationNumber
);
1220 rElements
.push_back(std::unique_ptr
<PDFElement
>(pReference
));
1221 if (bInObject
&& nDepth
> 0 && pObject
)
1222 // Inform the object about a new in-dictionary reference.
1223 pObject
->AddDictionaryReference(pReference
);
1225 if (!rElements
.back()->Read(rStream
))
1227 SAL_WARN("vcl.filter",
1228 "PDFDocument::Tokenize: PDFElement::Read() failed");
1232 else if (aKeyword
== "stream")
1234 // Look up the length of the stream from the parent object's dictionary.
1236 for (size_t nElement
= 0; nElement
< rElements
.size(); ++nElement
)
1238 // Iterate in reverse order.
1239 size_t nIndex
= rElements
.size() - nElement
- 1;
1240 PDFElement
* pElement
= rElements
[nIndex
].get();
1241 auto pObj
= dynamic_cast<PDFObjectElement
*>(pElement
);
1245 PDFElement
* pLookup
= pObj
->Lookup("Length"_ostr
);
1246 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pLookup
);
1249 // Length is provided as a reference.
1250 nLength
= pReference
->LookupNumber(rStream
);
1254 auto pNumber
= dynamic_cast<PDFNumberElement
*>(pLookup
);
1257 // Length is provided directly.
1258 nLength
= pNumber
->GetValue();
1264 "PDFDocument::Tokenize: found no Length key for stream keyword");
1268 PDFDocument::SkipLineBreaks(rStream
);
1269 auto pStreamElement
= new PDFStreamElement(nLength
);
1271 pObject
->SetStream(pStreamElement
);
1272 rElements
.push_back(std::unique_ptr
<PDFElement
>(pStreamElement
));
1273 if (!rElements
.back()->Read(rStream
))
1275 SAL_WARN("vcl.filter",
1276 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1280 else if (aKeyword
== "endstream")
1282 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndStreamElement
));
1283 if (!rElements
.back()->Read(rStream
))
1285 SAL_WARN("vcl.filter",
1286 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1290 else if (aKeyword
== "endobj")
1292 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFEndObjectElement
));
1293 if (!rElements
.back()->Read(rStream
))
1295 SAL_WARN("vcl.filter",
1296 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1299 if (eMode
== TokenizeMode::END_OF_OBJECT
)
1301 // Found endobj and only object parsing was requested, we're done.
1307 // We're at the end of an object stream, parse the stored objects.
1308 pObjectStream
->ParseStoredObjects();
1309 pObjectStream
= nullptr;
1310 pObjectKey
= nullptr;
1314 else if (aKeyword
== "true" || aKeyword
== "false")
1315 rElements
.push_back(std::unique_ptr
<PDFElement
>(
1316 new PDFBooleanElement(aKeyword
.toBoolean())));
1317 else if (aKeyword
== "null")
1318 rElements
.push_back(std::unique_ptr
<PDFElement
>(new PDFNullElement
));
1319 else if (aKeyword
== "xref")
1320 // Allow 'f' and 'n' keywords.
1322 else if (bInXRef
&& (aKeyword
== "f" || aKeyword
== "n"))
1325 else if (aKeyword
== "trailer")
1327 auto pTrailer
= new PDFTrailerElement(*this);
1329 // Make it possible to find this trailer later by offset.
1330 pTrailer
->Read(rStream
);
1331 m_aOffsetTrailers
[pTrailer
->GetLocation()] = pTrailer
;
1333 // When reading till the first EOF token only, remember
1334 // just the first trailer token.
1335 if (eMode
!= TokenizeMode::EOF_TOKEN
|| !m_pTrailer
)
1336 m_pTrailer
= pTrailer
;
1337 rElements
.push_back(std::unique_ptr
<PDFElement
>(pTrailer
));
1339 else if (aKeyword
== "startxref")
1341 bInStartXRef
= true;
1345 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1346 << aKeyword
<< "' keyword at byte position "
1353 auto uChar
= static_cast<unsigned char>(ch
);
1354 // Be more lenient and allow unexpected null char
1355 if (!rtl::isAsciiWhiteSpace(uChar
) && uChar
!= 0)
1357 SAL_WARN("vcl.filter",
1358 "PDFDocument::Tokenize: unexpected character with code "
1359 << sal_Int32(ch
) << " at byte position " << rStream
.Tell());
1362 SAL_WARN_IF(uChar
== 0, "vcl.filter",
1363 "PDFDocument::Tokenize: unexpected null character at "
1364 << rStream
.Tell() << " - ignoring");
1374 void PDFDocument::SetIDObject(size_t nID
, PDFObjectElement
* pObject
)
1376 m_aIDObjects
[nID
] = pObject
;
1379 bool PDFDocument::ReadWithPossibleFixup(SvStream
& rStream
)
1384 // Read failed, try a roundtrip through pdfium and then retry.
1386 SvMemoryStream aStandardizedStream
;
1387 vcl::pdf::convertToHighestSupported(rStream
, aStandardizedStream
);
1388 return Read(aStandardizedStream
);
1391 bool PDFDocument::Read(SvStream
& rStream
)
1393 // Check file magic.
1394 std::vector
<sal_Int8
> aHeader(5);
1396 rStream
.ReadBytes(aHeader
.data(), aHeader
.size());
1397 if (aHeader
[0] != '%' || aHeader
[1] != 'P' || aHeader
[2] != 'D' || aHeader
[3] != 'F'
1398 || aHeader
[4] != '-')
1400 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1404 // Allow later editing of the contents in-memory.
1406 m_aEditBuffer
.WriteStream(rStream
);
1408 // clear out key items that may have been filled with info from any previous read attempt
1409 m_aOffsetTrailers
.clear();
1410 m_aTrailerOffsets
.clear();
1411 m_pTrailer
= nullptr;
1412 m_pXRefStream
= nullptr;
1414 // Look up the offset of the xref table.
1415 size_t nStartXRef
= FindStartXRef(rStream
);
1416 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef
);
1417 if (nStartXRef
== 0)
1419 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1424 rStream
.Seek(nStartXRef
);
1425 OString aKeyword
= ReadKeyword(rStream
);
1426 if (aKeyword
.isEmpty())
1427 ReadXRefStream(rStream
);
1431 if (aKeyword
!= "xref")
1433 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1437 if (!Tokenize(rStream
, TokenizeMode::EOF_TOKEN
, m_aElements
, nullptr))
1439 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1444 PDFNumberElement
* pPrev
= nullptr;
1447 pPrev
= dynamic_cast<PDFNumberElement
*>(m_pTrailer
->Lookup("Prev"_ostr
));
1449 // Remember the offset of this trailer in the correct order. It's
1450 // possible that newer trailers don't have a larger offset.
1451 m_aTrailerOffsets
.push_back(m_pTrailer
->GetLocation());
1453 else if (m_pXRefStream
)
1454 pPrev
= dynamic_cast<PDFNumberElement
*>(m_pXRefStream
->Lookup("Prev"_ostr
));
1456 nStartXRef
= pPrev
->GetValue();
1458 // Reset state, except the edit buffer.
1459 m_aOffsetTrailers
.clear(); // contents are lifecycle managed by m_aElements
1460 m_aElements
.clear();
1461 m_aOffsetObjects
.clear();
1462 m_aIDObjects
.clear();
1463 m_aStartXRefs
.clear();
1465 m_pTrailer
= nullptr;
1466 m_pXRefStream
= nullptr;
1471 // Then we can tokenize the stream.
1473 return Tokenize(rStream
, TokenizeMode::END_OF_STREAM
, m_aElements
, nullptr);
1476 OString
PDFDocument::ReadKeyword(SvStream
& rStream
)
1480 rStream
.ReadChar(ch
);
1483 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
1486 rStream
.ReadChar(ch
);
1488 return aBuf
.toString();
1490 rStream
.SeekRel(-1);
1491 return aBuf
.toString();
1494 size_t PDFDocument::FindStartXRef(SvStream
& rStream
)
1496 // Find the "startxref" token, somewhere near the end of the document.
1497 std::vector
<char> aBuf(1024);
1498 rStream
.Seek(STREAM_SEEK_TO_END
);
1499 if (rStream
.Tell() > aBuf
.size())
1500 rStream
.SeekRel(static_cast<sal_Int64
>(-1) * aBuf
.size());
1502 // The document is really short, then just read it from the start.
1504 size_t nBeforePeek
= rStream
.Tell();
1505 size_t nSize
= rStream
.ReadBytes(aBuf
.data(), aBuf
.size());
1506 rStream
.Seek(nBeforePeek
);
1507 if (nSize
!= aBuf
.size())
1509 OString
aPrefix("startxref"_ostr
);
1510 // Find the last startxref at the end of the document.
1511 auto itLastValid
= aBuf
.end();
1512 auto it
= aBuf
.begin();
1515 it
= std::search(it
, aBuf
.end(), aPrefix
.getStr(), aPrefix
.getStr() + aPrefix
.getLength());
1516 if (it
== aBuf
.end())
1522 if (itLastValid
== aBuf
.end())
1524 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1528 rStream
.SeekRel(itLastValid
- aBuf
.begin() + aPrefix
.getLength());
1531 SAL_WARN("vcl.filter",
1532 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1536 PDFDocument::SkipWhitespace(rStream
);
1537 PDFNumberElement aNumber
;
1538 if (!aNumber
.Read(rStream
))
1540 return aNumber
.GetValue();
1543 void PDFDocument::ReadXRefStream(SvStream
& rStream
)
1545 // Look up the stream length in the object dictionary.
1546 if (!Tokenize(rStream
, TokenizeMode::END_OF_OBJECT
, m_aElements
, nullptr))
1548 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1552 if (m_aElements
.empty())
1554 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1558 PDFObjectElement
* pObject
= nullptr;
1559 for (const auto& pElement
: m_aElements
)
1561 if (auto pObj
= dynamic_cast<PDFObjectElement
*>(pElement
.get()))
1569 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1573 // So that the Prev key can be looked up later.
1574 m_pXRefStream
= pObject
;
1576 PDFElement
* pLookup
= pObject
->Lookup("Length"_ostr
);
1577 auto pNumber
= dynamic_cast<PDFNumberElement
*>(pLookup
);
1580 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1583 sal_uInt64 nLength
= pNumber
->GetValue();
1585 // Look up the stream offset.
1586 PDFStreamElement
* pStream
= nullptr;
1587 for (const auto& pElement
: m_aElements
)
1589 if (auto pS
= dynamic_cast<PDFStreamElement
*>(pElement
.get()))
1597 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1601 // Read and decompress it.
1602 rStream
.Seek(pStream
->GetOffset());
1603 std::vector
<char> aBuf(nLength
);
1604 rStream
.ReadBytes(aBuf
.data(), aBuf
.size());
1606 auto pFilter
= dynamic_cast<PDFNameElement
*>(pObject
->Lookup("Filter"_ostr
));
1609 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1613 if (pFilter
->GetValue() != "FlateDecode")
1615 SAL_WARN("vcl.filter",
1616 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter
->GetValue());
1622 if (auto pDecodeParams
1623 = dynamic_cast<PDFDictionaryElement
*>(pObject
->Lookup("DecodeParms"_ostr
)))
1625 const std::map
<OString
, PDFElement
*>& rItems
= pDecodeParams
->GetItems();
1626 auto it
= rItems
.find("Columns"_ostr
);
1627 if (it
!= rItems
.end())
1628 if (auto pColumns
= dynamic_cast<PDFNumberElement
*>(it
->second
))
1629 nColumns
= pColumns
->GetValue();
1630 it
= rItems
.find("Predictor"_ostr
);
1631 if (it
!= rItems
.end())
1632 if (auto pPredictor
= dynamic_cast<PDFNumberElement
*>(it
->second
))
1633 nPredictor
= pPredictor
->GetValue();
1636 SvMemoryStream
aSource(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
1637 SvMemoryStream aStream
;
1639 aZCodec
.BeginCompression();
1640 aZCodec
.Decompress(aSource
, aStream
);
1641 if (!aZCodec
.EndCompression())
1643 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1647 // Look up the first and the last entry we need to read.
1648 auto pIndex
= dynamic_cast<PDFArrayElement
*>(pObject
->Lookup("Index"_ostr
));
1649 std::vector
<size_t> aFirstObjects
;
1650 std::vector
<size_t> aNumberOfObjects
;
1653 auto pSize
= dynamic_cast<PDFNumberElement
*>(pObject
->Lookup("Size"_ostr
));
1656 aFirstObjects
.push_back(0);
1657 aNumberOfObjects
.push_back(pSize
->GetValue());
1661 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1667 const std::vector
<PDFElement
*>& rIndexElements
= pIndex
->GetElements();
1668 size_t nFirstObject
= 0;
1669 for (size_t i
= 0; i
< rIndexElements
.size(); ++i
)
1673 auto pFirstObject
= dynamic_cast<PDFNumberElement
*>(rIndexElements
[i
]);
1676 SAL_WARN("vcl.filter",
1677 "PDFDocument::ReadXRefStream: Index has no first object");
1680 nFirstObject
= pFirstObject
->GetValue();
1684 auto pNumberOfObjects
= dynamic_cast<PDFNumberElement
*>(rIndexElements
[i
]);
1685 if (!pNumberOfObjects
)
1687 SAL_WARN("vcl.filter",
1688 "PDFDocument::ReadXRefStream: Index has no number of objects");
1691 aFirstObjects
.push_back(nFirstObject
);
1692 aNumberOfObjects
.push_back(pNumberOfObjects
->GetValue());
1696 // Look up the format of a single entry.
1697 const int nWSize
= 3;
1698 auto pW
= dynamic_cast<PDFArrayElement
*>(pObject
->Lookup("W"_ostr
));
1699 if (!pW
|| pW
->GetElements().size() < nWSize
)
1701 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1705 // First character is the (kind of) repeated predictor.
1706 int nLineLength
= 1;
1707 for (size_t i
= 0; i
< nWSize
; ++i
)
1709 auto pI
= dynamic_cast<PDFNumberElement
*>(pW
->GetElements()[i
]);
1712 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1715 aW
[i
] = pI
->GetValue();
1716 nLineLength
+= aW
[i
];
1719 if (nPredictor
> 1 && nLineLength
- 1 != nColumns
)
1721 SAL_WARN("vcl.filter",
1722 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1727 for (size_t nSubSection
= 0; nSubSection
< aFirstObjects
.size(); ++nSubSection
)
1729 size_t nFirstObject
= aFirstObjects
[nSubSection
];
1730 size_t nNumberOfObjects
= aNumberOfObjects
[nSubSection
];
1732 // This is the line as read from the stream.
1733 std::vector
<unsigned char> aOrigLine(nLineLength
);
1734 // This is the line as it appears after tweaking according to nPredictor.
1735 std::vector
<unsigned char> aFilteredLine(nLineLength
);
1736 for (size_t nEntry
= 0; nEntry
< nNumberOfObjects
; ++nEntry
)
1738 size_t nIndex
= nFirstObject
+ nEntry
;
1740 aStream
.ReadBytes(aOrigLine
.data(), aOrigLine
.size());
1741 if (nPredictor
> 1 && aOrigLine
[0] + 10 != nPredictor
)
1743 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1744 "inconsistent with /DecodeParms/Predictor for object #"
1749 for (int i
= 0; i
< nLineLength
; ++i
)
1757 // PNG prediction: up (on all rows).
1758 aFilteredLine
[i
] = aFilteredLine
[i
] + aOrigLine
[i
];
1761 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1767 // First character is already handled above.
1770 // Start of the current field in the stream data.
1772 for (; nPos
< nOffset
+ aW
[0]; ++nPos
)
1774 unsigned char nCh
= aFilteredLine
[nPos
];
1775 nType
= (nType
<< 8) + nCh
;
1778 // Start of the object in the file stream.
1779 size_t nStreamOffset
= 0;
1781 for (; nPos
< nOffset
+ aW
[1]; ++nPos
)
1783 unsigned char nCh
= aFilteredLine
[nPos
];
1784 nStreamOffset
= (nStreamOffset
<< 8) + nCh
;
1787 // Generation number of the object.
1788 size_t nGenerationNumber
= 0;
1790 for (; nPos
< nOffset
+ aW
[2]; ++nPos
)
1792 unsigned char nCh
= aFilteredLine
[nPos
];
1793 nGenerationNumber
= (nGenerationNumber
<< 8) + nCh
;
1796 // Ignore invalid nType.
1799 if (m_aXRef
.find(nIndex
) == m_aXRef
.end())
1805 aEntry
.SetType(XRefEntryType::FREE
);
1808 aEntry
.SetType(XRefEntryType::NOT_COMPRESSED
);
1811 aEntry
.SetType(XRefEntryType::COMPRESSED
);
1814 aEntry
.SetOffset(nStreamOffset
);
1815 m_aXRef
[nIndex
] = aEntry
;
1822 void PDFDocument::ReadXRef(SvStream
& rStream
)
1824 PDFDocument::SkipWhitespace(rStream
);
1828 PDFNumberElement aFirstObject
;
1829 if (!aFirstObject
.Read(rStream
))
1831 // Next token is not a number, it'll be the trailer.
1835 if (aFirstObject
.GetValue() < 0)
1837 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1841 PDFDocument::SkipWhitespace(rStream
);
1842 PDFNumberElement aNumberOfEntries
;
1843 if (!aNumberOfEntries
.Read(rStream
))
1845 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1849 if (aNumberOfEntries
.GetValue() < 0)
1851 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1855 size_t nSize
= aNumberOfEntries
.GetValue();
1856 for (size_t nEntry
= 0; nEntry
< nSize
; ++nEntry
)
1858 size_t nIndex
= aFirstObject
.GetValue() + nEntry
;
1859 PDFDocument::SkipWhitespace(rStream
);
1860 PDFNumberElement aOffset
;
1861 if (!aOffset
.Read(rStream
))
1863 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1867 PDFDocument::SkipWhitespace(rStream
);
1868 PDFNumberElement aGenerationNumber
;
1869 if (!aGenerationNumber
.Read(rStream
))
1871 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1875 PDFDocument::SkipWhitespace(rStream
);
1876 OString aKeyword
= ReadKeyword(rStream
);
1877 if (aKeyword
!= "f" && aKeyword
!= "n")
1879 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1882 // xrefs are read in reverse order, so never update an existing
1883 // offset with an older one.
1884 if (m_aXRef
.find(nIndex
) == m_aXRef
.end())
1887 aEntry
.SetOffset(aOffset
.GetValue());
1888 // Initially only the first entry is dirty.
1890 aEntry
.SetDirty(true);
1891 m_aXRef
[nIndex
] = aEntry
;
1893 PDFDocument::SkipWhitespace(rStream
);
1898 void PDFDocument::SkipWhitespace(SvStream
& rStream
)
1904 rStream
.ReadChar(ch
);
1908 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch
)))
1910 rStream
.SeekRel(-1);
1916 void PDFDocument::SkipLineBreaks(SvStream
& rStream
)
1922 rStream
.ReadChar(ch
);
1926 if (ch
!= '\n' && ch
!= '\r')
1928 rStream
.SeekRel(-1);
1934 size_t PDFDocument::GetObjectOffset(size_t nIndex
) const
1936 auto it
= m_aXRef
.find(nIndex
);
1937 if (it
== m_aXRef
.end() || it
->second
.GetType() == XRefEntryType::COMPRESSED
)
1939 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1940 << nIndex
<< ", but failed");
1944 return it
->second
.GetOffset();
1947 const std::vector
<std::unique_ptr
<PDFElement
>>& PDFDocument::GetElements() const
1952 /// Visits the page tree recursively, looking for page objects.
1953 static void visitPages(PDFObjectElement
* pPages
, std::vector
<PDFObjectElement
*>& rRet
)
1955 auto pKidsRef
= pPages
->Lookup("Kids"_ostr
);
1956 auto pKids
= dynamic_cast<PDFArrayElement
*>(pKidsRef
);
1959 auto pRefKids
= dynamic_cast<PDFReferenceElement
*>(pKidsRef
);
1962 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1965 auto pObjWithKids
= pRefKids
->LookupObject();
1968 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1972 pKids
= pObjWithKids
->GetArray();
1977 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1981 pPages
->setVisiting(true);
1983 for (const auto& pKid
: pKids
->GetElements())
1985 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pKid
);
1989 PDFObjectElement
* pKidObject
= pReference
->LookupObject();
1993 // detect if visiting reenters itself
1994 if (pKidObject
->alreadyVisiting())
1996 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
2000 auto pName
= dynamic_cast<PDFNameElement
*>(pKidObject
->Lookup("Type"_ostr
));
2001 if (pName
&& pName
->GetValue() == "Pages")
2002 // Pages inside pages: recurse.
2003 visitPages(pKidObject
, rRet
);
2005 // Found an actual page.
2006 rRet
.push_back(pKidObject
);
2009 pPages
->setVisiting(false);
2012 PDFObjectElement
* PDFDocument::GetCatalog()
2014 PDFReferenceElement
* pRoot
= nullptr;
2016 PDFTrailerElement
* pTrailer
= nullptr;
2017 if (!m_aTrailerOffsets
.empty())
2019 // Get access to the latest trailer, and work with the keys of that
2021 auto it
= m_aOffsetTrailers
.find(m_aTrailerOffsets
[0]);
2022 if (it
!= m_aOffsetTrailers
.end())
2023 pTrailer
= it
->second
;
2027 pRoot
= dynamic_cast<PDFReferenceElement
*>(pTrailer
->Lookup("Root"_ostr
));
2028 else if (m_pXRefStream
)
2029 pRoot
= dynamic_cast<PDFReferenceElement
*>(m_pXRefStream
->Lookup("Root"_ostr
));
2033 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
2037 return pRoot
->LookupObject();
2040 std::vector
<PDFObjectElement
*> PDFDocument::GetPages()
2042 std::vector
<PDFObjectElement
*> aRet
;
2044 PDFObjectElement
* pCatalog
= GetCatalog();
2047 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
2051 PDFObjectElement
* pPages
= pCatalog
->LookupObject("Pages"_ostr
);
2054 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog
->GetObjectValue()
2055 << ") has no pages");
2059 visitPages(pPages
, aRet
);
2064 void PDFDocument::PushBackEOF(size_t nOffset
) { m_aEOFs
.push_back(nOffset
); }
2066 std::vector
<PDFObjectElement
*> PDFDocument::GetSignatureWidgets()
2068 std::vector
<PDFObjectElement
*> aRet
;
2070 std::vector
<PDFObjectElement
*> aPages
= GetPages();
2072 for (const auto& pPage
: aPages
)
2077 PDFElement
* pAnnotsElement
= pPage
->Lookup("Annots"_ostr
);
2078 auto pAnnots
= dynamic_cast<PDFArrayElement
*>(pAnnotsElement
);
2081 // Annots is not an array, see if it's a reference to an object
2082 // with a direct array.
2083 auto pAnnotsRef
= dynamic_cast<PDFReferenceElement
*>(pAnnotsElement
);
2086 if (PDFObjectElement
* pAnnotsObject
= pAnnotsRef
->LookupObject())
2088 pAnnots
= pAnnotsObject
->GetArray();
2096 for (const auto& pAnnot
: pAnnots
->GetElements())
2098 auto pReference
= dynamic_cast<PDFReferenceElement
*>(pAnnot
);
2102 PDFObjectElement
* pAnnotObject
= pReference
->LookupObject();
2106 auto pFT
= dynamic_cast<PDFNameElement
*>(pAnnotObject
->Lookup("FT"_ostr
));
2107 if (!pFT
|| pFT
->GetValue() != "Sig")
2110 aRet
.push_back(pAnnotObject
);
2117 std::vector
<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement
const* pElement
)
2119 return svl::crypto::DecodeHexString(pElement
->GetValue());
2122 OUString
PDFDocument::DecodeHexStringUTF16BE(PDFHexStringElement
const& rElement
)
2124 std::vector
<unsigned char> const encoded(DecodeHexString(&rElement
));
2125 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2126 // only the latter supported is here
2127 if (encoded
.size() < 2 || encoded
[0] != 0xFE || encoded
[1] != 0xFF || (encoded
.size() & 1) != 0)
2131 OUStringBuffer
buf(encoded
.size() - 2);
2132 for (size_t i
= 2; i
< encoded
.size(); i
+= 2)
2134 buf
.append(sal_Unicode((static_cast<sal_uInt16
>(encoded
[i
]) << 8) | encoded
[i
+ 1]));
2136 return buf
.makeStringAndClear();
2139 PDFCommentElement::PDFCommentElement(PDFDocument
& rDoc
)
2144 bool PDFCommentElement::Read(SvStream
& rStream
)
2146 // Read from (including) the % char till (excluding) the end of the line/stream.
2149 rStream
.ReadChar(ch
);
2152 if (ch
== '\n' || ch
== '\r' || rStream
.eof())
2154 m_aComment
= aBuf
.makeStringAndClear();
2156 if (m_aComment
.startsWith("%%EOF"))
2158 sal_uInt64 nPos
= rStream
.Tell();
2161 rStream
.ReadChar(ch
);
2162 rStream
.SeekRel(-1);
2163 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2170 m_rDoc
.PushBackEOF(nPos
);
2173 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment
<< "'");
2177 rStream
.ReadChar(ch
);
2183 PDFNumberElement::PDFNumberElement() = default;
2185 bool PDFNumberElement::Read(SvStream
& rStream
)
2188 m_nOffset
= rStream
.Tell();
2190 rStream
.ReadChar(ch
);
2195 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch
)) && ch
!= '-' && ch
!= '+' && ch
!= '.')
2197 rStream
.SeekRel(-1);
2200 while (!rStream
.eof())
2202 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch
)) && ch
!= '-' && ch
!= '+'
2205 rStream
.SeekRel(-1);
2206 m_nLength
= rStream
.Tell() - m_nOffset
;
2207 m_fValue
= o3tl::toDouble(aBuf
);
2209 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue
<< "'");
2213 rStream
.ReadChar(ch
);
2219 sal_uInt64
PDFNumberElement::GetLocation() const { return m_nOffset
; }
2221 sal_uInt64
PDFNumberElement::GetLength() const { return m_nLength
; }
2223 bool PDFBooleanElement::Read(SvStream
& /*rStream*/) { return true; }
2225 bool PDFNullElement::Read(SvStream
& /*rStream*/) { return true; }
2227 bool PDFHexStringElement::Read(SvStream
& rStream
)
2230 rStream
.ReadChar(ch
);
2233 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2236 rStream
.ReadChar(ch
);
2239 while (!rStream
.eof())
2243 m_aValue
= aBuf
.makeStringAndClear();
2244 SAL_INFO("vcl.filter",
2245 "PDFHexStringElement::Read: m_aValue length is " << m_aValue
.getLength());
2249 rStream
.ReadChar(ch
);
2255 const OString
& PDFHexStringElement::GetValue() const { return m_aValue
; }
2257 bool PDFLiteralStringElement::Read(SvStream
& rStream
)
2261 rStream
.ReadChar(ch
);
2264 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2268 rStream
.ReadChar(ch
);
2270 // Start with 1 nesting level as we read a '(' above already.
2273 while (!rStream
.eof())
2275 if (ch
== '(' && nPrevCh
!= '\\')
2278 if (ch
== ')' && nPrevCh
!= '\\')
2283 // ')' of the outermost '(' is reached.
2284 m_aValue
= aBuf
.makeStringAndClear();
2285 SAL_INFO("vcl.filter",
2286 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue
<< "'");
2291 rStream
.ReadChar(ch
);
2297 const OString
& PDFLiteralStringElement::GetValue() const { return m_aValue
; }
2299 PDFTrailerElement::PDFTrailerElement(PDFDocument
& rDoc
)
2301 , m_pDictionaryElement(nullptr)
2305 bool PDFTrailerElement::Read(SvStream
& rStream
)
2307 m_nOffset
= rStream
.Tell();
2311 PDFElement
* PDFTrailerElement::Lookup(const OString
& rDictionaryKey
)
2313 if (!m_pDictionaryElement
)
2315 PDFObjectParser
aParser(m_rDoc
.GetElements());
2316 aParser
.parse(this);
2318 if (!m_pDictionaryElement
)
2320 return m_pDictionaryElement
->LookupElement(rDictionaryKey
);
2323 sal_uInt64
PDFTrailerElement::GetLocation() const { return m_nOffset
; }
2325 double PDFNumberElement::GetValue() const { return m_fValue
; }
2327 PDFObjectElement::PDFObjectElement(PDFDocument
& rDoc
, double fObjectValue
, double fGenerationValue
)
2329 , m_fObjectValue(fObjectValue
)
2330 , m_fGenerationValue(fGenerationValue
)
2331 , m_pNumberElement(nullptr)
2332 , m_pNameElement(nullptr)
2333 , m_nDictionaryOffset(0)
2334 , m_nDictionaryLength(0)
2335 , m_pDictionaryElement(nullptr)
2338 , m_pArrayElement(nullptr)
2339 , m_pStreamElement(nullptr)
2344 bool PDFObjectElement::Read(SvStream
& /*rStream*/)
2346 SAL_INFO("vcl.filter",
2347 "PDFObjectElement::Read: " << m_fObjectValue
<< " " << m_fGenerationValue
<< " obj");
2351 PDFDictionaryElement::PDFDictionaryElement() = default;
2353 PDFElement
* PDFDictionaryElement::Lookup(const std::map
<OString
, PDFElement
*>& rDictionary
,
2354 const OString
& rKey
)
2356 auto it
= rDictionary
.find(rKey
);
2357 if (it
== rDictionary
.end())
2363 PDFObjectElement
* PDFDictionaryElement::LookupObject(const OString
& rDictionaryKey
)
2365 auto pKey
= dynamic_cast<PDFReferenceElement
*>(
2366 PDFDictionaryElement::Lookup(m_aItems
, rDictionaryKey
));
2369 SAL_WARN("vcl.filter",
2370 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2375 return pKey
->LookupObject();
2378 PDFElement
* PDFDictionaryElement::LookupElement(const OString
& rDictionaryKey
)
2380 return PDFDictionaryElement::Lookup(m_aItems
, rDictionaryKey
);
2383 void PDFObjectElement::parseIfNecessary()
2388 if (!m_aElements
.empty())
2390 // This is a stored object in an object stream.
2391 PDFObjectParser
aParser(m_aElements
);
2392 aParser
.parse(this);
2396 // Normal object: elements are stored as members of the document itself.
2397 PDFObjectParser
aParser(m_rDoc
.GetElements());
2398 aParser
.parse(this);
2403 PDFElement
* PDFObjectElement::Lookup(const OString
& rDictionaryKey
)
2406 if (!m_pDictionaryElement
)
2408 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey
);
2411 PDFObjectElement
* PDFObjectElement::LookupObject(const OString
& rDictionaryKey
)
2413 auto pKey
= dynamic_cast<PDFReferenceElement
*>(Lookup(rDictionaryKey
));
2416 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2421 return pKey
->LookupObject();
2424 double PDFObjectElement::GetObjectValue() const { return m_fObjectValue
; }
2426 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset
)
2428 m_nDictionaryOffset
= nDictionaryOffset
;
2431 sal_uInt64
PDFObjectElement::GetDictionaryOffset()
2434 return m_nDictionaryOffset
;
2437 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset
) { m_nArrayOffset
= nArrayOffset
; }
2439 sal_uInt64
PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset
; }
2441 void PDFDictionaryElement::SetKeyOffset(const OString
& rKey
, sal_uInt64 nOffset
)
2443 m_aDictionaryKeyOffset
[rKey
] = nOffset
;
2446 void PDFDictionaryElement::SetKeyValueLength(const OString
& rKey
, sal_uInt64 nLength
)
2448 m_aDictionaryKeyValueLength
[rKey
] = nLength
;
2451 sal_uInt64
PDFDictionaryElement::GetKeyOffset(const OString
& rKey
) const
2453 auto it
= m_aDictionaryKeyOffset
.find(rKey
);
2454 if (it
== m_aDictionaryKeyOffset
.end())
2460 sal_uInt64
PDFDictionaryElement::GetKeyValueLength(const OString
& rKey
) const
2462 auto it
= m_aDictionaryKeyValueLength
.find(rKey
);
2463 if (it
== m_aDictionaryKeyValueLength
.end())
2469 const std::map
<OString
, PDFElement
*>& PDFDictionaryElement::GetItems() const { return m_aItems
; }
2471 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength
)
2473 m_nDictionaryLength
= nDictionaryLength
;
2476 sal_uInt64
PDFObjectElement::GetDictionaryLength()
2479 return m_nDictionaryLength
;
2482 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength
) { m_nArrayLength
= nArrayLength
; }
2484 sal_uInt64
PDFObjectElement::GetArrayLength() const { return m_nArrayLength
; }
2486 PDFDictionaryElement
* PDFObjectElement::GetDictionary()
2489 return m_pDictionaryElement
;
2492 void PDFObjectElement::SetDictionary(PDFDictionaryElement
* pDictionaryElement
)
2494 m_pDictionaryElement
= pDictionaryElement
;
2497 void PDFObjectElement::SetNumberElement(PDFNumberElement
* pNumberElement
)
2499 m_pNumberElement
= pNumberElement
;
2502 PDFNumberElement
* PDFObjectElement::GetNumberElement() const { return m_pNumberElement
; }
2504 void PDFObjectElement::SetNameElement(PDFNameElement
* pNameElement
)
2506 m_pNameElement
= pNameElement
;
2509 PDFNameElement
* PDFObjectElement::GetNameElement() const { return m_pNameElement
; }
2511 const std::vector
<PDFReferenceElement
*>& PDFObjectElement::GetDictionaryReferences() const
2513 return m_aDictionaryReferences
;
2516 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement
* pReference
)
2518 m_aDictionaryReferences
.push_back(pReference
);
2521 const std::map
<OString
, PDFElement
*>& PDFObjectElement::GetDictionaryItems()
2524 return m_pDictionaryElement
->GetItems();
2527 void PDFObjectElement::SetArray(PDFArrayElement
* pArrayElement
) { m_pArrayElement
= pArrayElement
; }
2529 void PDFObjectElement::SetStream(PDFStreamElement
* pStreamElement
)
2531 m_pStreamElement
= pStreamElement
;
2534 PDFStreamElement
* PDFObjectElement::GetStream() const { return m_pStreamElement
; }
2536 PDFArrayElement
* PDFObjectElement::GetArray()
2539 return m_pArrayElement
;
2542 void PDFObjectElement::ParseStoredObjects()
2544 if (!m_pStreamElement
)
2546 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2550 auto pType
= dynamic_cast<PDFNameElement
*>(Lookup("Type"_ostr
));
2551 if (!pType
|| pType
->GetValue() != "ObjStm")
2554 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2556 SAL_WARN("vcl.filter",
2557 "PDFDocument::ReadXRefStream: unexpected type: " << pType
->GetValue());
2561 auto pFilter
= dynamic_cast<PDFNameElement
*>(Lookup("Filter"_ostr
));
2562 if (!pFilter
|| pFilter
->GetValue() != "FlateDecode")
2565 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2567 SAL_WARN("vcl.filter",
2568 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter
->GetValue());
2572 auto pFirst
= dynamic_cast<PDFNumberElement
*>(Lookup("First"_ostr
));
2575 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2579 auto pN
= dynamic_cast<PDFNumberElement
*>(Lookup("N"_ostr
));
2582 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2585 size_t nN
= pN
->GetValue();
2587 auto pLength
= dynamic_cast<PDFNumberElement
*>(Lookup("Length"_ostr
));
2590 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2593 size_t nLength
= pLength
->GetValue();
2595 // Read and decompress it.
2596 SvMemoryStream
& rEditBuffer
= m_rDoc
.GetEditBuffer();
2597 rEditBuffer
.Seek(m_pStreamElement
->GetOffset());
2598 std::vector
<char> aBuf(nLength
);
2599 rEditBuffer
.ReadBytes(aBuf
.data(), aBuf
.size());
2600 SvMemoryStream
aSource(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
2601 SvMemoryStream aStream
;
2603 aZCodec
.BeginCompression();
2604 aZCodec
.Decompress(aSource
, aStream
);
2605 if (!aZCodec
.EndCompression())
2607 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2611 nLength
= aStream
.TellEnd();
2613 std::vector
<size_t> aObjNums
;
2614 std::vector
<size_t> aOffsets
;
2615 std::vector
<size_t> aLengths
;
2616 // First iterate over and find out the lengths.
2617 for (size_t nObject
= 0; nObject
< nN
; ++nObject
)
2619 PDFNumberElement aObjNum
;
2620 if (!aObjNum
.Read(aStream
))
2622 SAL_WARN("vcl.filter",
2623 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2626 aObjNums
.push_back(aObjNum
.GetValue());
2628 PDFDocument::SkipWhitespace(aStream
);
2630 PDFNumberElement aByteOffset
;
2631 if (!aByteOffset
.Read(aStream
))
2633 SAL_WARN("vcl.filter",
2634 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2637 aOffsets
.push_back(pFirst
->GetValue() + aByteOffset
.GetValue());
2639 if (aOffsets
.size() > 1)
2640 aLengths
.push_back(aOffsets
.back() - aOffsets
[aOffsets
.size() - 2]);
2641 if (nObject
+ 1 == nN
)
2642 aLengths
.push_back(nLength
- aOffsets
.back());
2644 PDFDocument::SkipWhitespace(aStream
);
2647 // Now create streams with the proper length and tokenize the data.
2648 for (size_t nObject
= 0; nObject
< nN
; ++nObject
)
2650 size_t nObjNum
= aObjNums
[nObject
];
2651 size_t nOffset
= aOffsets
[nObject
];
2652 size_t nLen
= aLengths
[nObject
];
2654 aStream
.Seek(nOffset
);
2655 m_aStoredElements
.push_back(std::make_unique
<PDFObjectElement
>(m_rDoc
, nObjNum
, 0));
2656 PDFObjectElement
* pStored
= m_aStoredElements
.back().get();
2660 aStream
.ReadBytes(aBuf
.data(), aBuf
.size());
2661 SvMemoryStream
aStoredStream(aBuf
.data(), aBuf
.size(), StreamMode::READ
);
2663 m_rDoc
.Tokenize(aStoredStream
, TokenizeMode::STORED_OBJECT
, pStored
->GetStoredElements(),
2665 // This is how references know the object is stored inside this object stream.
2666 m_rDoc
.SetIDObject(nObjNum
, pStored
);
2668 // Store the stream of the object in the object stream for later use.
2669 std::unique_ptr
<SvMemoryStream
> pStreamBuffer(new SvMemoryStream());
2670 aStoredStream
.Seek(0);
2671 pStreamBuffer
->WriteStream(aStoredStream
);
2672 pStored
->SetStreamBuffer(pStreamBuffer
);
2676 std::vector
<std::unique_ptr
<PDFElement
>>& PDFObjectElement::GetStoredElements()
2681 SvMemoryStream
* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer
.get(); }
2683 void PDFObjectElement::SetStreamBuffer(std::unique_ptr
<SvMemoryStream
>& pStreamBuffer
)
2685 m_pStreamBuffer
= std::move(pStreamBuffer
);
2688 PDFDocument
& PDFObjectElement::GetDocument() { return m_rDoc
; }
2690 PDFReferenceElement::PDFReferenceElement(PDFDocument
& rDoc
, PDFNumberElement
& rObject
,
2691 PDFNumberElement
const& rGeneration
)
2693 , m_fObjectValue(rObject
.GetValue())
2694 , m_fGenerationValue(rGeneration
.GetValue())
2695 , m_rObject(rObject
)
2699 PDFNumberElement
& PDFReferenceElement::GetObjectElement() const { return m_rObject
; }
2701 bool PDFReferenceElement::Read(SvStream
& rStream
)
2703 SAL_INFO("vcl.filter",
2704 "PDFReferenceElement::Read: " << m_fObjectValue
<< " " << m_fGenerationValue
<< " R");
2705 m_nOffset
= rStream
.Tell();
2709 sal_uInt64
PDFReferenceElement::GetOffset() const { return m_nOffset
; }
2711 double PDFReferenceElement::LookupNumber(SvStream
& rStream
) const
2713 size_t nOffset
= m_rDoc
.GetObjectOffset(m_fObjectValue
);
2716 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2721 sal_uInt64 nOrigPos
= rStream
.Tell();
2722 comphelper::ScopeGuard
g([&]() { rStream
.Seek(nOrigPos
); });
2724 rStream
.Seek(nOffset
);
2726 PDFDocument::SkipWhitespace(rStream
);
2727 PDFNumberElement aNumber
;
2728 bool bRet
= aNumber
.Read(rStream
);
2729 if (!bRet
|| aNumber
.GetValue() != m_fObjectValue
)
2731 SAL_WARN("vcl.filter",
2732 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2738 PDFDocument::SkipWhitespace(rStream
);
2739 PDFNumberElement aNumber
;
2740 bool bRet
= aNumber
.Read(rStream
);
2741 if (!bRet
|| aNumber
.GetValue() != m_fGenerationValue
)
2743 SAL_WARN("vcl.filter",
2744 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2750 PDFDocument::SkipWhitespace(rStream
);
2751 OString aKeyword
= PDFDocument::ReadKeyword(rStream
);
2752 if (aKeyword
!= "obj")
2754 SAL_WARN("vcl.filter",
2755 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2760 PDFDocument::SkipWhitespace(rStream
);
2761 PDFNumberElement aNumber
;
2762 if (!aNumber
.Read(rStream
))
2764 SAL_WARN("vcl.filter",
2765 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2769 return aNumber
.GetValue();
2772 PDFObjectElement
* PDFReferenceElement::LookupObject()
2774 return m_rDoc
.LookupObject(m_fObjectValue
);
2777 PDFObjectElement
* PDFDocument::LookupObject(size_t nObjectNumber
)
2779 auto itIDObjects
= m_aIDObjects
.find(nObjectNumber
);
2781 if (itIDObjects
!= m_aIDObjects
.end())
2782 return itIDObjects
->second
;
2784 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber
);
2788 SvMemoryStream
& PDFDocument::GetEditBuffer() { return m_aEditBuffer
; }
2790 int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue
; }
2792 int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue
; }
2794 bool PDFDictionaryElement::Read(SvStream
& rStream
)
2797 rStream
.ReadChar(ch
);
2800 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch
);
2806 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2810 rStream
.ReadChar(ch
);
2813 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch
);
2817 m_nLocation
= rStream
.Tell();
2819 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2824 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2826 sal_uInt64
PDFEndDictionaryElement::GetLocation() const { return m_nLocation
; }
2828 bool PDFEndDictionaryElement::Read(SvStream
& rStream
)
2830 m_nLocation
= rStream
.Tell();
2832 rStream
.ReadChar(ch
);
2835 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch
);
2841 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2845 rStream
.ReadChar(ch
);
2848 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch
);
2852 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2857 PDFNameElement::PDFNameElement() = default;
2859 bool PDFNameElement::Read(SvStream
& rStream
)
2862 rStream
.ReadChar(ch
);
2865 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch
);
2868 m_nLocation
= rStream
.Tell();
2872 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2876 // Read till the first white-space.
2878 rStream
.ReadChar(ch
);
2879 while (!rStream
.eof())
2881 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch
)) || ch
== '/' || ch
== '['
2882 || ch
== ']' || ch
== '<' || ch
== '>' || ch
== '(')
2884 rStream
.SeekRel(-1);
2885 m_aValue
= aBuf
.makeStringAndClear();
2886 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue
<< "'");
2890 rStream
.ReadChar(ch
);
2896 const OString
& PDFNameElement::GetValue() const { return m_aValue
; }
2898 sal_uInt64
PDFNameElement::GetLocation() const { return m_nLocation
; }
2900 PDFStreamElement::PDFStreamElement(size_t nLength
)
2901 : m_nLength(nLength
)
2906 bool PDFStreamElement::Read(SvStream
& rStream
)
2908 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength
);
2909 m_nOffset
= rStream
.Tell();
2910 std::vector
<unsigned char> aBytes(m_nLength
);
2911 rStream
.ReadBytes(aBytes
.data(), aBytes
.size());
2912 m_aMemory
.WriteBytes(aBytes
.data(), aBytes
.size());
2914 return rStream
.good();
2917 SvMemoryStream
& PDFStreamElement::GetMemory() { return m_aMemory
; }
2919 sal_uInt64
PDFStreamElement::GetOffset() const { return m_nOffset
; }
2921 bool PDFEndStreamElement::Read(SvStream
& /*rStream*/) { return true; }
2923 bool PDFEndObjectElement::Read(SvStream
& /*rStream*/) { return true; }
2925 PDFArrayElement::PDFArrayElement(PDFObjectElement
* pObject
)
2926 : m_pObject(pObject
)
2930 bool PDFArrayElement::Read(SvStream
& rStream
)
2933 rStream
.ReadChar(ch
);
2936 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch
);
2940 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2945 void PDFArrayElement::PushBack(PDFElement
* pElement
)
2948 SAL_INFO("vcl.filter",
2949 "PDFArrayElement::PushBack: object is " << m_pObject
->GetObjectValue());
2950 m_aElements
.push_back(pElement
);
2953 const std::vector
<PDFElement
*>& PDFArrayElement::GetElements() const { return m_aElements
; }
2955 PDFEndArrayElement::PDFEndArrayElement() = default;
2957 bool PDFEndArrayElement::Read(SvStream
& rStream
)
2959 m_nOffset
= rStream
.Tell();
2961 rStream
.ReadChar(ch
);
2964 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch
);
2968 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2973 sal_uInt64
PDFEndArrayElement::GetOffset() const { return m_nOffset
; }
2977 size_t PDFObjectParser::parse(PDFElement
* pParsingElement
, size_t nStartIndex
, int nCurrentDepth
)
2979 // The index of last parsed element
2980 size_t nReturnIndex
= 0;
2982 pParsingElement
->setParsing(true);
2984 comphelper::ScopeGuard
aGuard([pParsingElement
]() { pParsingElement
->setParsing(false); });
2986 // Current object, if root is an object, else nullptr
2987 auto pParsingObject
= dynamic_cast<PDFObjectElement
*>(pParsingElement
);
2988 auto pParsingTrailer
= dynamic_cast<PDFTrailerElement
*>(pParsingElement
);
2990 // Current dictionary, if root is an dictionary, else nullptr
2991 auto pParsingDictionary
= dynamic_cast<PDFDictionaryElement
*>(pParsingElement
);
2993 // Current parsing array, if root is an array, else nullptr
2994 auto pParsingArray
= dynamic_cast<PDFArrayElement
*>(pParsingElement
);
2996 // Find out where the dictionary for this object starts.
2997 size_t nIndex
= nStartIndex
;
2998 for (size_t i
= nStartIndex
; i
< mrElements
.size(); ++i
)
3000 if (mrElements
[i
].get() == pParsingElement
)
3008 sal_uInt64 nNameOffset
= 0;
3009 std::vector
<PDFNumberElement
*> aNumbers
;
3011 sal_uInt64 nDictionaryOffset
= 0;
3013 // Current depth; 1 is current
3016 for (size_t i
= nIndex
; i
< mrElements
.size(); ++i
)
3018 auto* pCurrentElement
= mrElements
[i
].get();
3020 // Dictionary tokens can be nested, track enter/leave.
3021 if (auto pCurrentDictionary
= dynamic_cast<PDFDictionaryElement
*>(pCurrentElement
))
3023 // Handle previously stored number
3024 if (!aNumbers
.empty())
3026 if (pParsingDictionary
)
3028 PDFNumberElement
* pNumber
= aNumbers
.back();
3030 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3032 pParsingDictionary
->insert(aName
, pNumber
);
3033 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3034 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3036 else if (pParsingArray
)
3038 for (auto& pNumber
: aNumbers
)
3039 pParsingArray
->PushBack(pNumber
);
3043 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3051 if (nDepth
== 1) // pParsingDictionary is the current one
3053 // First dictionary start, track start offset.
3054 nDictionaryOffset
= pCurrentDictionary
->GetLocation();
3058 // Then the toplevel dictionary of the object.
3059 pParsingObject
->SetDictionary(pCurrentDictionary
);
3060 pParsingObject
->SetDictionaryOffset(nDictionaryOffset
);
3061 pParsingDictionary
= pCurrentDictionary
;
3063 else if (pParsingTrailer
)
3065 pParsingTrailer
->SetDictionary(pCurrentDictionary
);
3066 pParsingDictionary
= pCurrentDictionary
;
3069 else if (!pCurrentDictionary
->alreadyParsing())
3073 pParsingArray
->PushBack(pCurrentDictionary
);
3075 else if (pParsingDictionary
)
3077 // Dictionary toplevel value.
3078 pParsingDictionary
->insert(aName
, pCurrentDictionary
);
3082 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3084 // Nested dictionary.
3085 const size_t nNextElementIndex
= parse(pCurrentDictionary
, i
, nCurrentDepth
+ 1);
3086 i
= std::max(i
, nNextElementIndex
- 1);
3089 else if (auto pCurrentEndDictionary
3090 = dynamic_cast<PDFEndDictionaryElement
*>(pCurrentElement
))
3092 // Handle previously stored number
3093 if (!aNumbers
.empty())
3095 if (pParsingDictionary
)
3097 PDFNumberElement
* pNumber
= aNumbers
.back();
3099 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3101 pParsingDictionary
->insert(aName
, pNumber
);
3102 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3103 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3105 else if (pParsingArray
)
3107 for (auto& pNumber
: aNumbers
)
3108 pParsingArray
->PushBack(pNumber
);
3112 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3118 if (pParsingDictionary
)
3120 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3121 sal_uInt64 nLength
= pCurrentEndDictionary
->GetLocation() - nNameOffset
+ 2;
3122 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3126 if (nDepth
== 1) // did the parsing ended
3128 // Last dictionary end, track length and stop parsing.
3131 sal_uInt64 nDictionaryLength
3132 = pCurrentEndDictionary
->GetLocation() - nDictionaryOffset
;
3133 pParsingObject
->SetDictionaryLength(nDictionaryLength
);
3141 else if (auto pCurrentArray
= dynamic_cast<PDFArrayElement
*>(pCurrentElement
))
3143 // Handle previously stored number
3144 if (!aNumbers
.empty())
3146 if (pParsingDictionary
)
3148 PDFNumberElement
* pNumber
= aNumbers
.back();
3151 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3152 pParsingDictionary
->insert(aName
, pNumber
);
3153 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3154 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3156 else if (pParsingArray
)
3158 for (auto& pNumber
: aNumbers
)
3159 pParsingArray
->PushBack(pNumber
);
3163 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3170 if (nDepth
== 1) // pParsingDictionary is the current one
3174 pParsingObject
->SetArray(pCurrentArray
);
3175 pParsingArray
= pCurrentArray
;
3178 else if (!pCurrentArray
->alreadyParsing())
3182 // Array is toplevel
3183 pParsingArray
->PushBack(pCurrentArray
);
3185 else if (pParsingDictionary
)
3187 // Dictionary toplevel value.
3188 pParsingDictionary
->insert(aName
, pCurrentArray
);
3191 const size_t nNextElementIndex
= parse(pCurrentArray
, i
, nCurrentDepth
+ 1);
3193 // ensure we go forwards and not endlessly loop
3194 i
= std::max(i
, nNextElementIndex
- 1);
3197 else if (auto pCurrentEndArray
= dynamic_cast<PDFEndArrayElement
*>(pCurrentElement
))
3199 // Handle previously stored number
3200 if (!aNumbers
.empty())
3202 if (pParsingDictionary
)
3204 PDFNumberElement
* pNumber
= aNumbers
.back();
3207 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3208 pParsingDictionary
->insert(aName
, pNumber
);
3209 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3210 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3212 else if (pParsingArray
)
3214 for (auto& pNumber
: aNumbers
)
3215 pParsingArray
->PushBack(pNumber
);
3219 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3225 if (nDepth
== 1) // did the pParsing ended
3227 // Last array end, track length and stop parsing.
3232 if (pParsingDictionary
)
3234 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3235 // Include the ending ']' in the length of the key - (array)value pair length.
3236 sal_uInt64 nLength
= pCurrentEndArray
->GetOffset() - nNameOffset
+ 1;
3237 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3242 else if (auto pCurrentName
= dynamic_cast<PDFNameElement
*>(pCurrentElement
))
3244 // Handle previously stored number
3245 if (!aNumbers
.empty())
3247 if (pParsingDictionary
)
3249 PDFNumberElement
* pNumber
= aNumbers
.back();
3252 = pNumber
->GetLocation() + pNumber
->GetLength() - nNameOffset
;
3253 pParsingDictionary
->insert(aName
, pNumber
);
3254 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3255 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3257 else if (pParsingArray
)
3259 for (auto& pNumber
: aNumbers
)
3260 pParsingArray
->PushBack(pNumber
);
3269 // if we are in an array, just push the name to array
3270 pParsingArray
->PushBack(pCurrentName
);
3272 else if (pParsingDictionary
)
3274 // if we are in a dictionary, we need to store the name as a possible key
3275 if (aName
.isEmpty())
3277 aName
= pCurrentName
->GetValue();
3278 nNameOffset
= pCurrentName
->GetLocation();
3282 sal_uInt64 nKeyLength
3283 = pCurrentName
->GetLocation() + pCurrentName
->GetLength() - nNameOffset
;
3284 pParsingDictionary
->insert(aName
, pCurrentName
);
3285 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3286 pParsingDictionary
->SetKeyValueLength(aName
, nKeyLength
);
3291 else if (auto pReference
= dynamic_cast<PDFReferenceElement
*>(pCurrentElement
))
3293 // Handle previously stored number
3294 if (aNumbers
.size() > 2)
3296 aNumbers
.resize(aNumbers
.size() - 2);
3299 for (auto& pNumber
: aNumbers
)
3300 pParsingArray
->PushBack(pNumber
);
3307 pParsingArray
->PushBack(pReference
);
3309 else if (pParsingDictionary
)
3311 sal_uInt64 nLength
= pReference
->GetOffset() - nNameOffset
;
3312 pParsingDictionary
->insert(aName
, pReference
);
3313 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3314 pParsingDictionary
->SetKeyValueLength(aName
, nLength
);
3319 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3323 else if (auto pLiteralString
= dynamic_cast<PDFLiteralStringElement
*>(pCurrentElement
))
3325 // Handle previously stored number
3326 if (!aNumbers
.empty())
3330 for (auto& pNumber
: aNumbers
)
3331 pParsingArray
->PushBack(pNumber
);
3338 pParsingArray
->PushBack(pLiteralString
);
3340 else if (pParsingDictionary
)
3342 pParsingDictionary
->insert(aName
, pLiteralString
);
3343 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3348 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3351 else if (auto pBoolean
= dynamic_cast<PDFBooleanElement
*>(pCurrentElement
))
3353 // Handle previously stored number
3354 if (!aNumbers
.empty())
3358 for (auto& pNumber
: aNumbers
)
3359 pParsingArray
->PushBack(pNumber
);
3366 pParsingArray
->PushBack(pBoolean
);
3368 else if (pParsingDictionary
)
3370 pParsingDictionary
->insert(aName
, pBoolean
);
3371 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3376 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3379 else if (auto pHexString
= dynamic_cast<PDFHexStringElement
*>(pCurrentElement
))
3381 // Handle previously stored number
3382 if (!aNumbers
.empty())
3386 for (auto& pNumber
: aNumbers
)
3387 pParsingArray
->PushBack(pNumber
);
3394 pParsingArray
->PushBack(pHexString
);
3396 else if (pParsingDictionary
)
3398 pParsingDictionary
->insert(aName
, pHexString
);
3399 pParsingDictionary
->SetKeyOffset(aName
, nNameOffset
);
3403 else if (auto pNumberElement
= dynamic_cast<PDFNumberElement
*>(pCurrentElement
))
3405 // Just remember this, so that in case it's not a reference parameter,
3406 // we can handle it later.
3407 aNumbers
.push_back(pNumberElement
);
3409 else if (dynamic_cast<PDFEndObjectElement
*>(pCurrentElement
))
3411 // parsing of the object is finished
3414 else if (dynamic_cast<PDFObjectElement
*>(pCurrentElement
)
3415 || dynamic_cast<PDFTrailerElement
*>(pCurrentElement
))
3421 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3425 return nReturnIndex
;
3430 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */