tdf#154285 Check upper bound of arguments in SbRtl_Minute function
[LibreOffice.git] / vcl / source / filter / ipdf / pdfdocument.cxx
blob0b7c01a56ac15f9cefec595dd7cd8be3b08d003a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <vcl/filter/pdfdocument.hxx>
11 #include <pdf/pdfcompat.hxx>
13 #include <map>
14 #include <memory>
15 #include <vector>
17 #include <com/sun/star/uno/Sequence.hxx>
18 #include <com/sun/star/security/XCertificate.hpp>
20 #include <comphelper/scopeguard.hxx>
21 #include <comphelper/string.hxx>
22 #include <o3tl/string_view.hxx>
23 #include <rtl/character.hxx>
24 #include <rtl/strbuf.hxx>
25 #include <rtl/string.hxx>
26 #include <sal/log.hxx>
27 #include <sal/types.h>
28 #include <svl/cryptosign.hxx>
29 #include <tools/zcodec.hxx>
30 #include <vcl/pdfwriter.hxx>
31 #include <o3tl/safeint.hxx>
33 #include <pdf/objectcopier.hxx>
34 #include <pdf/COSWriter.hxx>
36 using namespace com::sun::star;
38 namespace vcl::filter
40 XRefEntry::XRefEntry() = default;
42 PDFDocument::PDFDocument() = default;
44 PDFDocument::~PDFDocument() = default;
46 bool PDFDocument::RemoveSignature(size_t nPosition)
48 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
49 if (nPosition >= aSignatures.size())
51 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
52 return false;
55 if (aSignatures.size() != m_aEOFs.size() - 1)
57 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
58 "and incremental updates");
59 return false;
62 // The EOF offset is the end of the original file, without the signature at
63 // nPosition.
64 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
65 // Drop all bytes after the current position.
66 m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
68 return m_aEditBuffer.good();
71 sal_Int32 PDFDocument::createObject()
73 sal_Int32 nObject = m_aXRef.size();
74 m_aXRef[nObject] = XRefEntry();
75 return nObject;
78 bool PDFDocument::updateObject(sal_Int32 nObject)
80 if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
82 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
83 return false;
86 XRefEntry aEntry;
87 aEntry.SetOffset(m_aEditBuffer.Tell());
88 aEntry.SetDirty(true);
89 m_aXRef[nObject] = aEntry;
90 return true;
93 bool PDFDocument::writeBufferBytes(const void* pBuffer, sal_uInt64 nBytes)
95 std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
96 return nWritten == nBytes;
99 void PDFDocument::SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine)
101 m_aSignatureLine = std::move(rSignatureLine);
104 void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
106 sal_uInt32 PDFDocument::GetNextSignature()
108 sal_uInt32 nRet = 0;
109 for (const auto& pSignature : GetSignatureWidgets())
111 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"_ostr));
112 if (!pT)
113 continue;
115 const OString& rValue = pT->GetValue();
116 std::string_view rest;
117 if (!rValue.startsWith("Signature", &rest))
118 continue;
120 nRet = std::max(nRet, o3tl::toUInt32(rest));
123 return nRet + 1;
126 sal_Int32 PDFDocument::WriteSignatureObject(svl::crypto::SigningContext& rSigningContext,
127 const OUString& rDescription, bool bAdES,
128 sal_uInt64& rLastByteRangeOffset,
129 sal_Int64& rContentOffset)
131 // Write signature object.
132 sal_Int32 nSignatureId = m_aXRef.size();
133 XRefEntry aSignatureEntry;
134 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
135 aSignatureEntry.SetDirty(true);
136 m_aXRef[nSignatureId] = aSignatureEntry;
138 OStringBuffer aSigBuffer(OString::number(nSignatureId)
139 + " 0 obj\n"
140 "<</Contents <");
141 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
142 // Reserve space for the PKCS#7 object.
143 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
144 comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
145 aSigBuffer.append(aContentFiller + ">\n/Type/Sig/SubFilter");
146 if (bAdES)
147 aSigBuffer.append("/ETSI.CAdES.detached");
148 else
149 aSigBuffer.append("/adbe.pkcs7.detached");
151 // Time of signing.
152 aSigBuffer.append(" /M (" + vcl::PDFWriter::GetDateTime(&rSigningContext)
153 + ")"
155 // Byte range: we can write offset1-length1 and offset2 right now, will
156 // write length2 later.
157 " /ByteRange [ 0 "
158 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
159 + OString::number(rContentOffset - 1) + " "
160 + OString::number(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1) + " ");
161 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
162 // We don't know how many bytes we need for the last ByteRange value, this
163 // should be enough.
164 OStringBuffer aByteRangeFiller;
165 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
166 aSigBuffer.append(aByteRangeFiller
167 // Finish the Sig obj.
168 + " /Filter/Adobe.PPKMS");
170 if (!rDescription.isEmpty())
172 pdf::COSWriter aWriter;
173 aWriter.writeKeyAndUnicode("/Reason", rDescription);
174 aSigBuffer.append(aWriter.getLine());
177 aSigBuffer.append(" >>\nendobj\n\n");
178 m_aEditBuffer.WriteOString(aSigBuffer);
180 return nSignatureId;
183 sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
185 PDFDocument aPDFDocument;
186 filter::PDFObjectElement* pPage = nullptr;
187 std::vector<filter::PDFObjectElement*> aContentStreams;
189 if (!m_aSignatureLine.empty())
191 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
192 // based on it.
193 SvMemoryStream aPDFStream;
194 aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
195 aPDFStream.Seek(0);
196 if (!aPDFDocument.Read(aPDFStream))
198 SAL_WARN("vcl.filter",
199 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
200 return -1;
203 std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
204 if (aPages.empty())
206 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
207 return -1;
210 pPage = aPages[0];
211 if (!pPage)
213 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
214 return -1;
217 // Calculate the bounding box.
218 PDFElement* pMediaBox = pPage->Lookup("MediaBox"_ostr);
219 auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
220 if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
222 SAL_WARN("vcl.filter",
223 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
224 return -1;
226 const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
227 auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
228 if (!pWidth)
230 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
231 return -1;
233 rSignatureRectangle.setWidth(pWidth->GetValue());
234 auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
235 if (!pHeight)
237 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
238 return -1;
240 rSignatureRectangle.setHeight(pHeight->GetValue());
242 if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"_ostr))
244 aContentStreams.push_back(pContentStream);
247 if (aContentStreams.empty())
249 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
250 return -1;
253 m_aSignatureLine.clear();
255 // Write appearance object: allocate an ID.
256 sal_Int32 nAppearanceId = m_aXRef.size();
257 m_aXRef[nAppearanceId] = XRefEntry();
259 // Write the object content.
260 SvMemoryStream aEditBuffer;
261 aEditBuffer.WriteNumberAsString(nAppearanceId);
262 aEditBuffer.WriteOString(" 0 obj\n");
263 aEditBuffer.WriteOString("<</Type/XObject\n/Subtype/Form\n");
265 PDFObjectCopier aCopier(*this);
266 if (!aContentStreams.empty())
268 assert(pPage && "aContentStreams is only filled if there was a pPage");
269 OStringBuffer aBuffer;
270 aCopier.copyPageResources(pPage, aBuffer);
271 aEditBuffer.WriteOString(aBuffer);
274 aEditBuffer.WriteOString("/BBox[0 0 ");
275 aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenWidth());
276 aEditBuffer.WriteOString(" ");
277 aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenHeight());
278 aEditBuffer.WriteOString("]\n/Length ");
280 // Add the object to the doc-level edit buffer and update the offset.
281 SvMemoryStream aStream;
282 bool bCompressed = false;
283 sal_Int32 nLength = 0;
284 if (!aContentStreams.empty())
286 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
288 aEditBuffer.WriteNumberAsString(nLength);
289 if (bCompressed)
291 aEditBuffer.WriteOString(" /Filter/FlateDecode");
294 aEditBuffer.WriteOString("\n>>\n");
296 aEditBuffer.WriteOString("stream\n");
298 // Copy the original page streams to the form XObject stream.
299 aStream.Seek(0);
300 aEditBuffer.WriteStream(aStream);
302 aEditBuffer.WriteOString("\nendstream\nendobj\n\n");
304 aEditBuffer.Seek(0);
305 XRefEntry aAppearanceEntry;
306 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
307 aAppearanceEntry.SetDirty(true);
308 m_aXRef[nAppearanceId] = aAppearanceEntry;
309 m_aEditBuffer.WriteStream(aEditBuffer);
311 return nAppearanceId;
314 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
315 sal_Int32 nAppearanceId,
316 const tools::Rectangle& rSignatureRectangle)
318 // Decide what identifier to use for the new signature.
319 sal_uInt32 nNextSignature = GetNextSignature();
321 // Write the Annot object, references nSignatureId and nAppearanceId.
322 sal_Int32 nAnnotId = m_aXRef.size();
323 XRefEntry aAnnotEntry;
324 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
325 aAnnotEntry.SetDirty(true);
326 m_aXRef[nAnnotId] = aAnnotEntry;
327 m_aEditBuffer.WriteNumberAsString(nAnnotId);
328 m_aEditBuffer.WriteOString(" 0 obj\n");
329 m_aEditBuffer.WriteOString("<</Type/Annot/Subtype/Widget/F 132\n");
330 m_aEditBuffer.WriteOString("/Rect[0 0 ");
331 m_aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenWidth());
332 m_aEditBuffer.WriteOString(" ");
333 m_aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenHeight());
334 m_aEditBuffer.WriteOString("]\n");
335 m_aEditBuffer.WriteOString("/FT/Sig\n");
336 m_aEditBuffer.WriteOString("/P ");
337 m_aEditBuffer.WriteNumberAsString(rFirstPage.GetObjectValue());
338 m_aEditBuffer.WriteOString(" 0 R\n");
339 m_aEditBuffer.WriteOString("/T(Signature");
340 m_aEditBuffer.WriteNumberAsString(nNextSignature);
341 m_aEditBuffer.WriteOString(")\n");
342 m_aEditBuffer.WriteOString("/V ");
343 m_aEditBuffer.WriteNumberAsString(nSignatureId);
344 m_aEditBuffer.WriteOString(" 0 R\n");
345 m_aEditBuffer.WriteOString("/DV ");
346 m_aEditBuffer.WriteNumberAsString(nSignatureId);
347 m_aEditBuffer.WriteOString(" 0 R\n");
348 m_aEditBuffer.WriteOString("/AP<<\n/N ");
349 m_aEditBuffer.WriteNumberAsString(nAppearanceId);
350 m_aEditBuffer.WriteOString(" 0 R\n>>\n");
351 m_aEditBuffer.WriteOString(">>\nendobj\n\n");
353 return nAnnotId;
356 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
358 PDFElement* pAnnots = rFirstPage.Lookup("Annots"_ostr);
359 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
360 if (pAnnotsReference)
362 // Write the updated Annots key of the Page object.
363 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
364 if (!pAnnotsObject)
366 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
367 return false;
370 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
371 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
372 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
373 m_aXRef[nAnnotsId].SetDirty(true);
374 m_aEditBuffer.WriteNumberAsString(nAnnotsId);
375 m_aEditBuffer.WriteOString(" 0 obj\n[");
377 // Write existing references.
378 PDFArrayElement* pArray = pAnnotsObject->GetArray();
379 if (!pArray)
381 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
382 return false;
385 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
387 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
388 if (!pReference)
389 continue;
391 if (i)
392 m_aEditBuffer.WriteOString(" ");
393 m_aEditBuffer.WriteNumberAsString(pReference->GetObjectValue());
394 m_aEditBuffer.WriteOString(" 0 R");
396 // Write our reference.
397 m_aEditBuffer.WriteOString(" ");
398 m_aEditBuffer.WriteNumberAsString(nAnnotId);
399 m_aEditBuffer.WriteOString(" 0 R");
401 m_aEditBuffer.WriteOString("]\nendobj\n\n");
403 else
405 // Write the updated first page object, references nAnnotId.
406 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
407 if (nFirstPageId >= m_aXRef.size())
409 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
410 return false;
412 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
413 m_aXRef[nFirstPageId].SetDirty(true);
414 m_aEditBuffer.WriteNumberAsString(nFirstPageId);
415 m_aEditBuffer.WriteOString(" 0 obj\n");
416 m_aEditBuffer.WriteOString("<<");
417 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
418 if (!pAnnotsArray)
420 // No Annots key, just write the key with a single reference.
421 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
422 + rFirstPage.GetDictionaryOffset(),
423 rFirstPage.GetDictionaryLength());
424 m_aEditBuffer.WriteOString("/Annots[");
425 m_aEditBuffer.WriteNumberAsString(nAnnotId);
426 m_aEditBuffer.WriteOString(" 0 R]");
428 else
430 // Annots key is already there, insert our reference at the end.
431 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
433 // Offset right before the end of the Annots array.
434 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots"_ostr)
435 + pDictionary->GetKeyValueLength("Annots"_ostr) - 1;
436 // Length of beginning of the dictionary -> Annots end.
437 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
438 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
439 + rFirstPage.GetDictionaryOffset(),
440 nAnnotsBeforeEndLength);
441 m_aEditBuffer.WriteOString(" ");
442 m_aEditBuffer.WriteNumberAsString(nAnnotId);
443 m_aEditBuffer.WriteOString(" 0 R");
444 // Length of Annots end -> end of the dictionary.
445 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
446 + rFirstPage.GetDictionaryLength()
447 - nAnnotsEndOffset;
448 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
449 + nAnnotsEndOffset,
450 nAnnotsAfterEndLength);
452 m_aEditBuffer.WriteOString(">>");
453 m_aEditBuffer.WriteOString("\nendobj\n\n");
456 return true;
459 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
461 if (m_pXRefStream)
462 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"_ostr));
463 else
465 if (!m_pTrailer)
467 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
468 return false;
470 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"_ostr));
472 if (!pRoot)
474 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
475 return false;
477 PDFObjectElement* pCatalog = pRoot->LookupObject();
478 if (!pCatalog)
480 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
481 return false;
483 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
484 if (nCatalogId >= m_aXRef.size())
486 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
487 return false;
489 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm"_ostr);
490 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
491 if (pAcroFormReference)
493 // Write the updated AcroForm key of the Catalog object.
494 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
495 if (!pAcroFormObject)
497 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
498 return false;
501 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
502 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
503 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
504 m_aXRef[nAcroFormId].SetDirty(true);
505 m_aEditBuffer.WriteNumberAsString(nAcroFormId);
506 m_aEditBuffer.WriteOString(" 0 obj\n");
508 // If this is nullptr, then the AcroForm object is not in an object stream.
509 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
511 if (!pAcroFormObject->Lookup("Fields"_ostr))
513 SAL_WARN("vcl.filter",
514 "PDFDocument::Sign: AcroForm object without required Fields key");
515 return false;
518 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
519 if (!pAcroFormDictionary)
521 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
522 return false;
525 // Offset right before the end of the Fields array.
526 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields"_ostr)
527 + pAcroFormDictionary->GetKeyValueLength("Fields"_ostr)
528 - strlen("]");
530 // Length of beginning of the object dictionary -> Fields end.
531 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
532 if (pStreamBuffer)
533 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
534 else
536 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
537 m_aEditBuffer.WriteOString("<<");
538 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
539 + pAcroFormObject->GetDictionaryOffset(),
540 nFieldsBeforeEndLength);
543 // Append our reference at the end of the Fields array.
544 m_aEditBuffer.WriteOString(" ");
545 m_aEditBuffer.WriteNumberAsString(nAnnotId);
546 m_aEditBuffer.WriteOString(" 0 R");
548 // Length of Fields end -> end of the object dictionary.
549 if (pStreamBuffer)
551 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
552 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
553 + nFieldsEndOffset,
554 nFieldsAfterEndLength);
556 else
558 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
559 + pAcroFormObject->GetDictionaryLength()
560 - nFieldsEndOffset;
561 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
562 + nFieldsEndOffset,
563 nFieldsAfterEndLength);
564 m_aEditBuffer.WriteOString(">>");
567 m_aEditBuffer.WriteOString("\nendobj\n\n");
569 else
571 // Write the updated Catalog object, references nAnnotId.
572 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
573 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
574 m_aXRef[nCatalogId].SetDirty(true);
575 m_aEditBuffer.WriteNumberAsString(nCatalogId);
576 m_aEditBuffer.WriteOString(" 0 obj\n");
577 m_aEditBuffer.WriteOString("<<");
578 if (!pAcroFormDictionary)
580 // No AcroForm key, assume no signatures.
581 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
582 + pCatalog->GetDictionaryOffset(),
583 pCatalog->GetDictionaryLength());
584 m_aEditBuffer.WriteOString("/AcroForm<</Fields[\n");
585 m_aEditBuffer.WriteNumberAsString(nAnnotId);
586 m_aEditBuffer.WriteOString(" 0 R\n]/SigFlags 3>>\n");
588 else
590 // AcroForm key is already there, insert our reference at the Fields end.
591 auto it = pAcroFormDictionary->GetItems().find("Fields"_ostr);
592 if (it == pAcroFormDictionary->GetItems().end())
594 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
595 return false;
598 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
599 if (!pFields)
601 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
602 return false;
605 // Offset right before the end of the Fields array.
606 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields"_ostr)
607 + pAcroFormDictionary->GetKeyValueLength("Fields"_ostr)
608 - 1;
609 // Length of beginning of the Catalog dictionary -> Fields end.
610 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
611 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
612 + pCatalog->GetDictionaryOffset(),
613 nFieldsBeforeEndLength);
614 m_aEditBuffer.WriteOString(" ");
615 m_aEditBuffer.WriteNumberAsString(nAnnotId);
616 m_aEditBuffer.WriteOString(" 0 R");
617 // Length of Fields end -> end of the Catalog dictionary.
618 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
619 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
620 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
621 + nFieldsEndOffset,
622 nFieldsAfterEndLength);
624 m_aEditBuffer.WriteOString(">>\nendobj\n\n");
627 return true;
630 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
632 if (m_pXRefStream)
634 // Write the xref stream.
635 // This is a bit meta: the xref stream stores its own offset.
636 sal_Int32 nXRefStreamId = m_aXRef.size();
637 XRefEntry aXRefStreamEntry;
638 aXRefStreamEntry.SetOffset(nXRefOffset);
639 aXRefStreamEntry.SetDirty(true);
640 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
642 // Write stream data.
643 SvMemoryStream aXRefStream;
644 const size_t nOffsetLen = 3;
645 // 3 additional bytes: predictor, the first and the third field.
646 const size_t nLineLength = nOffsetLen + 3;
647 // This is the line as it appears before tweaking according to the predictor.
648 std::vector<unsigned char> aOrigLine(nLineLength);
649 // This is the previous line.
650 std::vector<unsigned char> aPrevLine(nLineLength);
651 // This is the line as written to the stream.
652 std::vector<unsigned char> aFilteredLine(nLineLength);
653 for (const auto& rXRef : m_aXRef)
655 const XRefEntry& rEntry = rXRef.second;
657 if (!rEntry.GetDirty())
658 continue;
660 // Predictor.
661 size_t nPos = 0;
662 // PNG prediction: up (on all rows).
663 aOrigLine[nPos++] = 2;
665 // First field.
666 unsigned char nType = 0;
667 switch (rEntry.GetType())
669 case XRefEntryType::FREE:
670 nType = 0;
671 break;
672 case XRefEntryType::NOT_COMPRESSED:
673 nType = 1;
674 break;
675 case XRefEntryType::COMPRESSED:
676 nType = 2;
677 break;
679 aOrigLine[nPos++] = nType;
681 // Second field.
682 for (size_t i = 0; i < nOffsetLen; ++i)
684 size_t nByte = nOffsetLen - i - 1;
685 // Fields requiring more than one byte are stored with the
686 // high-order byte first.
687 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
688 aOrigLine[nPos++] = nCh;
691 // Third field.
692 aOrigLine[nPos++] = 0;
694 // Now apply the predictor.
695 aFilteredLine[0] = aOrigLine[0];
696 for (size_t i = 1; i < nLineLength; ++i)
698 // Count the delta vs the previous line.
699 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
700 // Remember the new reference.
701 aPrevLine[i] = aOrigLine[i];
704 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
707 m_aEditBuffer.WriteNumberAsString(nXRefStreamId);
708 m_aEditBuffer.WriteOString(
709 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
711 // ID.
712 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"_ostr));
713 if (pID)
715 const std::vector<PDFElement*>& rElements = pID->GetElements();
716 m_aEditBuffer.WriteOString("/ID [ <");
717 for (size_t i = 0; i < rElements.size(); ++i)
719 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
720 if (!pIDString)
721 continue;
723 m_aEditBuffer.WriteOString(pIDString->GetValue());
724 if ((i + 1) < rElements.size())
725 m_aEditBuffer.WriteOString("> <");
727 m_aEditBuffer.WriteOString("> ] ");
730 // Index.
731 m_aEditBuffer.WriteOString("/Index [ ");
732 for (const auto& rXRef : m_aXRef)
734 if (!rXRef.second.GetDirty())
735 continue;
737 m_aEditBuffer.WriteNumberAsString(rXRef.first);
738 m_aEditBuffer.WriteOString(" 1 ");
740 m_aEditBuffer.WriteOString("] ");
742 // Info.
743 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"_ostr));
744 if (pInfo)
746 m_aEditBuffer.WriteOString("/Info ");
747 m_aEditBuffer.WriteNumberAsString(pInfo->GetObjectValue());
748 m_aEditBuffer.WriteOString(" ");
749 m_aEditBuffer.WriteNumberAsString(pInfo->GetGenerationValue());
750 m_aEditBuffer.WriteOString(" R ");
753 // Length.
754 m_aEditBuffer.WriteOString("/Length ");
756 ZCodec aZCodec;
757 aZCodec.BeginCompression();
758 aXRefStream.Seek(0);
759 SvMemoryStream aStream;
760 aZCodec.Compress(aXRefStream, aStream);
761 aZCodec.EndCompression();
762 aXRefStream.Seek(0);
763 aXRefStream.SetStreamSize(0);
764 aStream.Seek(0);
765 aXRefStream.WriteStream(aStream);
767 m_aEditBuffer.WriteNumberAsString(aXRefStream.GetSize());
769 if (!m_aStartXRefs.empty())
771 // Write location of the previous cross-reference section.
772 m_aEditBuffer.WriteOString("/Prev ");
773 m_aEditBuffer.WriteNumberAsString(m_aStartXRefs.back());
776 // Root.
777 m_aEditBuffer.WriteOString("/Root ");
778 m_aEditBuffer.WriteNumberAsString(pRoot->GetObjectValue());
779 m_aEditBuffer.WriteOString(" ");
780 m_aEditBuffer.WriteNumberAsString(pRoot->GetGenerationValue());
781 m_aEditBuffer.WriteOString(" R ");
783 // Size.
784 m_aEditBuffer.WriteOString("/Size ");
785 m_aEditBuffer.WriteNumberAsString(m_aXRef.size());
787 m_aEditBuffer.WriteOString("/Type/XRef/W[1 3 1]>>\nstream\n");
788 aXRefStream.Seek(0);
789 m_aEditBuffer.WriteStream(aXRefStream);
790 m_aEditBuffer.WriteOString("\nendstream\nendobj\n\n");
792 else
794 // Write the xref table.
795 m_aEditBuffer.WriteOString("xref\n");
796 for (const auto& rXRef : m_aXRef)
798 size_t nObject = rXRef.first;
799 size_t nOffset = rXRef.second.GetOffset();
800 if (!rXRef.second.GetDirty())
801 continue;
803 m_aEditBuffer.WriteNumberAsString(nObject);
804 m_aEditBuffer.WriteOString(" 1\n");
805 OStringBuffer aBuffer = OString::number(static_cast<sal_Int32>(nOffset));
806 while (aBuffer.getLength() < 10)
807 aBuffer.insert(0, "0");
808 if (nObject == 0)
809 aBuffer.append(" 65535 f \n");
810 else
811 aBuffer.append(" 00000 n \n");
812 m_aEditBuffer.WriteOString(aBuffer);
815 // Write the trailer.
816 m_aEditBuffer.WriteOString("trailer\n<</Size ");
817 m_aEditBuffer.WriteNumberAsString(m_aXRef.size());
818 m_aEditBuffer.WriteOString("/Root ");
819 m_aEditBuffer.WriteNumberAsString(pRoot->GetObjectValue());
820 m_aEditBuffer.WriteOString(" ");
821 m_aEditBuffer.WriteNumberAsString(pRoot->GetGenerationValue());
822 m_aEditBuffer.WriteOString(" R\n");
823 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"_ostr));
824 if (pInfo)
826 m_aEditBuffer.WriteOString("/Info ");
827 m_aEditBuffer.WriteNumberAsString(pInfo->GetObjectValue());
828 m_aEditBuffer.WriteOString(" ");
829 m_aEditBuffer.WriteNumberAsString(pInfo->GetGenerationValue());
830 m_aEditBuffer.WriteOString(" R\n");
832 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"_ostr));
833 if (pID)
835 const std::vector<PDFElement*>& rElements = pID->GetElements();
836 m_aEditBuffer.WriteOString("/ID [ <");
837 for (size_t i = 0; i < rElements.size(); ++i)
839 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
840 if (!pIDString)
841 continue;
843 m_aEditBuffer.WriteOString(pIDString->GetValue());
844 if ((i + 1) < rElements.size())
845 m_aEditBuffer.WriteOString(">\n<");
847 m_aEditBuffer.WriteOString("> ]\n");
850 if (!m_aStartXRefs.empty())
852 // Write location of the previous cross-reference section.
853 m_aEditBuffer.WriteOString("/Prev ");
854 m_aEditBuffer.WriteNumberAsString(m_aStartXRefs.back());
857 m_aEditBuffer.WriteOString(">>\n");
861 bool PDFDocument::Sign(svl::crypto::SigningContext& rSigningContext, const OUString& rDescription,
862 bool bAdES)
864 m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
865 m_aEditBuffer.WriteOString("\n");
867 sal_uInt64 nSignatureLastByteRangeOffset = 0;
868 sal_Int64 nSignatureContentOffset = 0;
869 sal_Int32 nSignatureId
870 = WriteSignatureObject(rSigningContext, rDescription, bAdES, nSignatureLastByteRangeOffset,
871 nSignatureContentOffset);
872 assert(nSignatureContentOffset > 0
873 && "WriteSignatureObject guarantees a length for nSignatureContentOffset");
874 tools::Rectangle aSignatureRectangle;
875 sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
877 std::vector<PDFObjectElement*> aPages = GetPages();
878 if (aPages.empty())
880 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
881 return false;
884 size_t nPage = 0;
885 if (m_nSignaturePage < aPages.size())
887 nPage = m_nSignaturePage;
889 if (!aPages[nPage])
891 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
892 return false;
895 PDFObjectElement& rPage = *aPages[nPage];
896 sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
898 if (!WritePageObject(rPage, nAnnotId))
900 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
901 return false;
904 PDFReferenceElement* pRoot = nullptr;
905 if (!WriteCatalogObject(nAnnotId, pRoot))
907 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
908 return false;
911 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
912 WriteXRef(nXRefOffset, pRoot);
914 // Write startxref.
915 m_aEditBuffer.WriteOString("startxref\n");
916 m_aEditBuffer.WriteNumberAsString(nXRefOffset);
917 m_aEditBuffer.WriteOString("\n%%EOF\n");
919 // Finalize the signature, now that we know the total file size.
920 // Calculate the length of the last byte range.
921 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
922 sal_Int64 nLastByteRangeLength
923 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
924 // Write the length to the buffer.
925 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
926 OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
927 m_aEditBuffer.WriteOString(aByteRangeBuffer);
929 // Create the PKCS#7 object.
930 if (rSigningContext.m_xCertificate)
932 css::uno::Sequence<sal_Int8> aDerEncoded = rSigningContext.m_xCertificate->getEncoded();
933 if (!aDerEncoded.hasElements())
935 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
936 return false;
940 m_aEditBuffer.Seek(0);
941 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
942 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
943 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
945 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
946 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
947 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
948 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
950 OStringBuffer aCMSHexBuffer;
951 if (rSigningContext.m_aSignatureValue.empty())
953 svl::crypto::Signing aSigning(rSigningContext);
954 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
955 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
956 if (!aSigning.Sign(aCMSHexBuffer))
958 if (rSigningContext.m_xCertificate.is())
960 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
962 return false;
965 else
967 // The signature value provided by the context: use that instead of building a new
968 // signature.
969 for (unsigned char ch : rSigningContext.m_aSignatureValue)
971 svl::crypto::Signing::appendHex(ch, aCMSHexBuffer);
975 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
977 m_aEditBuffer.Seek(nSignatureContentOffset);
978 m_aEditBuffer.WriteOString(aCMSHexBuffer);
980 return true;
983 bool PDFDocument::Write(SvStream& rStream)
985 m_aEditBuffer.Seek(0);
986 rStream.WriteStream(m_aEditBuffer);
987 return rStream.good();
990 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
991 std::vector<std::unique_ptr<PDFElement>>& rElements,
992 PDFObjectElement* pObjectElement)
994 // Last seen object token.
995 PDFObjectElement* pObject = pObjectElement;
996 PDFNameElement* pObjectKey = nullptr;
997 PDFObjectElement* pObjectStream = nullptr;
998 bool bInXRef = false;
999 // The next number will be an xref offset.
1000 bool bInStartXRef = false;
1001 // Dictionary depth, so we know when we're outside any dictionaries.
1002 int nDepth = 0;
1003 // Last seen array token that's outside any dictionaries.
1004 PDFArrayElement* pArray = nullptr;
1005 // If we're inside an obj/endobj pair.
1006 bool bInObject = false;
1008 while (true)
1010 char ch;
1011 rStream.ReadChar(ch);
1012 if (rStream.eof())
1013 break;
1015 switch (ch)
1017 case '%':
1019 auto pComment = new PDFCommentElement(*this);
1020 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1021 rStream.SeekRel(-1);
1022 if (!rElements.back()->Read(rStream))
1024 SAL_WARN("vcl.filter",
1025 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1026 return false;
1028 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1029 && m_aEOFs.back() == rStream.Tell())
1031 // Found EOF and partial parsing requested, we're done.
1032 return true;
1034 break;
1036 case '<':
1038 // Dictionary or hex string.
1039 rStream.ReadChar(ch);
1040 rStream.SeekRel(-2);
1041 if (ch == '<')
1043 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1044 ++nDepth;
1046 else
1047 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1048 if (!rElements.back()->Read(rStream))
1050 SAL_WARN("vcl.filter",
1051 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1052 return false;
1054 break;
1056 case '>':
1058 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1059 --nDepth;
1060 rStream.SeekRel(-1);
1061 if (!rElements.back()->Read(rStream))
1063 SAL_WARN("vcl.filter",
1064 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1065 return false;
1067 break;
1069 case '[':
1071 auto pArr = new PDFArrayElement(pObject);
1072 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1073 if (nDepth == 0)
1075 // The array is attached directly, inform the object.
1076 pArray = pArr;
1077 if (pObject)
1079 pObject->SetArray(pArray);
1080 pObject->SetArrayOffset(rStream.Tell());
1083 ++nDepth;
1084 rStream.SeekRel(-1);
1085 if (!rElements.back()->Read(rStream))
1087 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1088 return false;
1090 break;
1092 case ']':
1094 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1095 --nDepth;
1096 rStream.SeekRel(-1);
1097 if (nDepth == 0)
1099 if (pObject)
1101 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1104 if (!rElements.back()->Read(rStream))
1106 SAL_WARN("vcl.filter",
1107 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1108 return false;
1110 break;
1112 case '/':
1114 auto pNameElement = new PDFNameElement();
1115 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1116 rStream.SeekRel(-1);
1117 if (!pNameElement->Read(rStream))
1119 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1120 return false;
1123 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1124 && pNameElement->GetValue() == "ObjStm")
1125 pObjectStream = pObject;
1126 else
1127 pObjectKey = pNameElement;
1129 if (bInObject && !nDepth && pObject)
1131 // Name element inside an object, but outside a
1132 // dictionary / array: remember it.
1133 pObject->SetNameElement(pNameElement);
1136 break;
1138 case '(':
1140 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1141 rStream.SeekRel(-1);
1142 if (!rElements.back()->Read(rStream))
1144 SAL_WARN("vcl.filter",
1145 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1146 return false;
1148 break;
1150 default:
1152 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1153 || ch == '.')
1155 // Numbering object: an integer or a real.
1156 auto pNumberElement = new PDFNumberElement();
1157 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1158 rStream.SeekRel(-1);
1159 if (!pNumberElement->Read(rStream))
1161 SAL_WARN("vcl.filter",
1162 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1163 return false;
1165 if (bInStartXRef)
1167 bInStartXRef = false;
1168 m_aStartXRefs.push_back(pNumberElement->GetValue());
1170 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1171 if (it != m_aOffsetObjects.end())
1172 m_pXRefStream = it->second;
1174 else if (bInObject && !nDepth && pObject)
1175 // Number element inside an object, but outside a
1176 // dictionary / array: remember it.
1177 pObject->SetNumberElement(pNumberElement);
1179 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1181 // Possible keyword, like "obj".
1182 rStream.SeekRel(-1);
1183 OString aKeyword = ReadKeyword(rStream);
1185 bool bObj = aKeyword == "obj";
1186 if (bObj || aKeyword == "R")
1188 size_t nElements = rElements.size();
1189 if (nElements < 2)
1191 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1192 "tokens before 'obj' or 'R' keyword");
1193 return false;
1196 auto pObjectNumber
1197 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1198 auto pGenerationNumber
1199 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1200 if (!pObjectNumber || !pGenerationNumber)
1202 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1203 "generation number before 'obj' or 'R' keyword");
1204 return false;
1207 if (bObj)
1209 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1210 pGenerationNumber->GetValue());
1211 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1212 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1213 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1214 bInObject = true;
1216 else
1218 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1219 *pGenerationNumber);
1220 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1221 if (bInObject && nDepth > 0 && pObject)
1222 // Inform the object about a new in-dictionary reference.
1223 pObject->AddDictionaryReference(pReference);
1225 if (!rElements.back()->Read(rStream))
1227 SAL_WARN("vcl.filter",
1228 "PDFDocument::Tokenize: PDFElement::Read() failed");
1229 return false;
1232 else if (aKeyword == "stream")
1234 // Look up the length of the stream from the parent object's dictionary.
1235 size_t nLength = 0;
1236 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1238 // Iterate in reverse order.
1239 size_t nIndex = rElements.size() - nElement - 1;
1240 PDFElement* pElement = rElements[nIndex].get();
1241 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1242 if (!pObj)
1243 continue;
1245 PDFElement* pLookup = pObj->Lookup("Length"_ostr);
1246 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1247 if (pReference)
1249 // Length is provided as a reference.
1250 nLength = pReference->LookupNumber(rStream);
1251 break;
1254 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1255 if (pNumber)
1257 // Length is provided directly.
1258 nLength = pNumber->GetValue();
1259 break;
1262 SAL_WARN(
1263 "vcl.filter",
1264 "PDFDocument::Tokenize: found no Length key for stream keyword");
1265 return false;
1268 PDFDocument::SkipLineBreaks(rStream);
1269 auto pStreamElement = new PDFStreamElement(nLength);
1270 if (pObject)
1271 pObject->SetStream(pStreamElement);
1272 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1273 if (!rElements.back()->Read(rStream))
1275 SAL_WARN("vcl.filter",
1276 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1277 return false;
1280 else if (aKeyword == "endstream")
1282 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1283 if (!rElements.back()->Read(rStream))
1285 SAL_WARN("vcl.filter",
1286 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1287 return false;
1290 else if (aKeyword == "endobj")
1292 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1293 if (!rElements.back()->Read(rStream))
1295 SAL_WARN("vcl.filter",
1296 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1297 return false;
1299 if (eMode == TokenizeMode::END_OF_OBJECT)
1301 // Found endobj and only object parsing was requested, we're done.
1302 return true;
1305 if (pObjectStream)
1307 // We're at the end of an object stream, parse the stored objects.
1308 pObjectStream->ParseStoredObjects();
1309 pObjectStream = nullptr;
1310 pObjectKey = nullptr;
1312 bInObject = false;
1314 else if (aKeyword == "true" || aKeyword == "false")
1315 rElements.push_back(std::unique_ptr<PDFElement>(
1316 new PDFBooleanElement(aKeyword.toBoolean())));
1317 else if (aKeyword == "null")
1318 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1319 else if (aKeyword == "xref")
1320 // Allow 'f' and 'n' keywords.
1321 bInXRef = true;
1322 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1325 else if (aKeyword == "trailer")
1327 auto pTrailer = new PDFTrailerElement(*this);
1329 // Make it possible to find this trailer later by offset.
1330 pTrailer->Read(rStream);
1331 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1333 // When reading till the first EOF token only, remember
1334 // just the first trailer token.
1335 if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1336 m_pTrailer = pTrailer;
1337 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1339 else if (aKeyword == "startxref")
1341 bInStartXRef = true;
1343 else
1345 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1346 << aKeyword << "' keyword at byte position "
1347 << rStream.Tell());
1348 return false;
1351 else
1353 auto uChar = static_cast<unsigned char>(ch);
1354 // Be more lenient and allow unexpected null char
1355 if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1357 SAL_WARN("vcl.filter",
1358 "PDFDocument::Tokenize: unexpected character with code "
1359 << sal_Int32(ch) << " at byte position " << rStream.Tell());
1360 return false;
1362 SAL_WARN_IF(uChar == 0, "vcl.filter",
1363 "PDFDocument::Tokenize: unexpected null character at "
1364 << rStream.Tell() << " - ignoring");
1366 break;
1371 return true;
1374 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1376 m_aIDObjects[nID] = pObject;
1379 bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream)
1381 if (Read(rStream))
1382 return true;
1384 // Read failed, try a roundtrip through pdfium and then retry.
1385 rStream.Seek(0);
1386 SvMemoryStream aStandardizedStream;
1387 vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
1388 return Read(aStandardizedStream);
1391 bool PDFDocument::Read(SvStream& rStream)
1393 // Check file magic.
1394 std::vector<sal_Int8> aHeader(5);
1395 rStream.Seek(0);
1396 rStream.ReadBytes(aHeader.data(), aHeader.size());
1397 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1398 || aHeader[4] != '-')
1400 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1401 return false;
1404 // Allow later editing of the contents in-memory.
1405 rStream.Seek(0);
1406 m_aEditBuffer.WriteStream(rStream);
1408 // clear out key items that may have been filled with info from any previous read attempt
1409 m_aOffsetTrailers.clear();
1410 m_aTrailerOffsets.clear();
1411 m_pTrailer = nullptr;
1412 m_pXRefStream = nullptr;
1414 // Look up the offset of the xref table.
1415 size_t nStartXRef = FindStartXRef(rStream);
1416 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1417 if (nStartXRef == 0)
1419 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1420 return false;
1422 while (true)
1424 rStream.Seek(nStartXRef);
1425 OString aKeyword = ReadKeyword(rStream);
1426 if (aKeyword.isEmpty())
1427 ReadXRefStream(rStream);
1429 else
1431 if (aKeyword != "xref")
1433 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1434 return false;
1436 ReadXRef(rStream);
1437 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1439 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1440 return false;
1444 PDFNumberElement* pPrev = nullptr;
1445 if (m_pTrailer)
1447 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"_ostr));
1449 // Remember the offset of this trailer in the correct order. It's
1450 // possible that newer trailers don't have a larger offset.
1451 m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1453 else if (m_pXRefStream)
1454 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"_ostr));
1455 if (pPrev)
1456 nStartXRef = pPrev->GetValue();
1458 // Reset state, except the edit buffer.
1459 m_aOffsetTrailers.clear(); // contents are lifecycle managed by m_aElements
1460 m_aElements.clear();
1461 m_aOffsetObjects.clear();
1462 m_aIDObjects.clear();
1463 m_aStartXRefs.clear();
1464 m_aEOFs.clear();
1465 m_pTrailer = nullptr;
1466 m_pXRefStream = nullptr;
1467 if (!pPrev)
1468 break;
1471 // Then we can tokenize the stream.
1472 rStream.Seek(0);
1473 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1476 OString PDFDocument::ReadKeyword(SvStream& rStream)
1478 OStringBuffer aBuf;
1479 char ch;
1480 rStream.ReadChar(ch);
1481 if (rStream.eof())
1482 return {};
1483 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1485 aBuf.append(ch);
1486 rStream.ReadChar(ch);
1487 if (rStream.eof())
1488 return aBuf.toString();
1490 rStream.SeekRel(-1);
1491 return aBuf.toString();
1494 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1496 // Find the "startxref" token, somewhere near the end of the document.
1497 std::vector<char> aBuf(1024);
1498 rStream.Seek(STREAM_SEEK_TO_END);
1499 if (rStream.Tell() > aBuf.size())
1500 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1501 else
1502 // The document is really short, then just read it from the start.
1503 rStream.Seek(0);
1504 size_t nBeforePeek = rStream.Tell();
1505 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1506 rStream.Seek(nBeforePeek);
1507 if (nSize != aBuf.size())
1508 aBuf.resize(nSize);
1509 OString aPrefix("startxref"_ostr);
1510 // Find the last startxref at the end of the document.
1511 auto itLastValid = aBuf.end();
1512 auto it = aBuf.begin();
1513 while (true)
1515 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1516 if (it == aBuf.end())
1517 break;
1519 itLastValid = it;
1520 ++it;
1522 if (itLastValid == aBuf.end())
1524 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1525 return 0;
1528 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1529 if (rStream.eof())
1531 SAL_WARN("vcl.filter",
1532 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1533 return 0;
1536 PDFDocument::SkipWhitespace(rStream);
1537 PDFNumberElement aNumber;
1538 if (!aNumber.Read(rStream))
1539 return 0;
1540 return aNumber.GetValue();
1543 void PDFDocument::ReadXRefStream(SvStream& rStream)
1545 // Look up the stream length in the object dictionary.
1546 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1548 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1549 return;
1552 if (m_aElements.empty())
1554 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1555 return;
1558 PDFObjectElement* pObject = nullptr;
1559 for (const auto& pElement : m_aElements)
1561 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1563 pObject = pObj;
1564 break;
1567 if (!pObject)
1569 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1570 return;
1573 // So that the Prev key can be looked up later.
1574 m_pXRefStream = pObject;
1576 PDFElement* pLookup = pObject->Lookup("Length"_ostr);
1577 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1578 if (!pNumber)
1580 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1581 return;
1583 sal_uInt64 nLength = pNumber->GetValue();
1585 // Look up the stream offset.
1586 PDFStreamElement* pStream = nullptr;
1587 for (const auto& pElement : m_aElements)
1589 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1591 pStream = pS;
1592 break;
1595 if (!pStream)
1597 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1598 return;
1601 // Read and decompress it.
1602 rStream.Seek(pStream->GetOffset());
1603 std::vector<char> aBuf(nLength);
1604 rStream.ReadBytes(aBuf.data(), aBuf.size());
1606 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"_ostr));
1607 if (!pFilter)
1609 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1610 return;
1613 if (pFilter->GetValue() != "FlateDecode")
1615 SAL_WARN("vcl.filter",
1616 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1617 return;
1620 int nColumns = 1;
1621 int nPredictor = 1;
1622 if (auto pDecodeParams
1623 = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms"_ostr)))
1625 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1626 auto it = rItems.find("Columns"_ostr);
1627 if (it != rItems.end())
1628 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1629 nColumns = pColumns->GetValue();
1630 it = rItems.find("Predictor"_ostr);
1631 if (it != rItems.end())
1632 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1633 nPredictor = pPredictor->GetValue();
1636 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1637 SvMemoryStream aStream;
1638 ZCodec aZCodec;
1639 aZCodec.BeginCompression();
1640 aZCodec.Decompress(aSource, aStream);
1641 if (!aZCodec.EndCompression())
1643 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1644 return;
1647 // Look up the first and the last entry we need to read.
1648 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"_ostr));
1649 std::vector<size_t> aFirstObjects;
1650 std::vector<size_t> aNumberOfObjects;
1651 if (!pIndex)
1653 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"_ostr));
1654 if (pSize)
1656 aFirstObjects.push_back(0);
1657 aNumberOfObjects.push_back(pSize->GetValue());
1659 else
1661 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1662 return;
1665 else
1667 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1668 size_t nFirstObject = 0;
1669 for (size_t i = 0; i < rIndexElements.size(); ++i)
1671 if (i % 2 == 0)
1673 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1674 if (!pFirstObject)
1676 SAL_WARN("vcl.filter",
1677 "PDFDocument::ReadXRefStream: Index has no first object");
1678 return;
1680 nFirstObject = pFirstObject->GetValue();
1681 continue;
1684 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1685 if (!pNumberOfObjects)
1687 SAL_WARN("vcl.filter",
1688 "PDFDocument::ReadXRefStream: Index has no number of objects");
1689 return;
1691 aFirstObjects.push_back(nFirstObject);
1692 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1696 // Look up the format of a single entry.
1697 const int nWSize = 3;
1698 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"_ostr));
1699 if (!pW || pW->GetElements().size() < nWSize)
1701 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1702 return;
1704 int aW[nWSize];
1705 // First character is the (kind of) repeated predictor.
1706 int nLineLength = 1;
1707 for (size_t i = 0; i < nWSize; ++i)
1709 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1710 if (!pI)
1712 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1713 return;
1715 aW[i] = pI->GetValue();
1716 nLineLength += aW[i];
1719 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1721 SAL_WARN("vcl.filter",
1722 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1723 return;
1726 aStream.Seek(0);
1727 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1729 size_t nFirstObject = aFirstObjects[nSubSection];
1730 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1732 // This is the line as read from the stream.
1733 std::vector<unsigned char> aOrigLine(nLineLength);
1734 // This is the line as it appears after tweaking according to nPredictor.
1735 std::vector<unsigned char> aFilteredLine(nLineLength);
1736 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1738 size_t nIndex = nFirstObject + nEntry;
1740 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1741 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1743 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1744 "inconsistent with /DecodeParms/Predictor for object #"
1745 << nIndex);
1746 return;
1749 for (int i = 0; i < nLineLength; ++i)
1751 switch (nPredictor)
1753 case 1:
1754 // No prediction.
1755 break;
1756 case 12:
1757 // PNG prediction: up (on all rows).
1758 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1759 break;
1760 default:
1761 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1762 << nPredictor);
1763 return;
1767 // First character is already handled above.
1768 int nPos = 1;
1769 size_t nType = 0;
1770 // Start of the current field in the stream data.
1771 int nOffset = nPos;
1772 for (; nPos < nOffset + aW[0]; ++nPos)
1774 unsigned char nCh = aFilteredLine[nPos];
1775 nType = (nType << 8) + nCh;
1778 // Start of the object in the file stream.
1779 size_t nStreamOffset = 0;
1780 nOffset = nPos;
1781 for (; nPos < nOffset + aW[1]; ++nPos)
1783 unsigned char nCh = aFilteredLine[nPos];
1784 nStreamOffset = (nStreamOffset << 8) + nCh;
1787 // Generation number of the object.
1788 size_t nGenerationNumber = 0;
1789 nOffset = nPos;
1790 for (; nPos < nOffset + aW[2]; ++nPos)
1792 unsigned char nCh = aFilteredLine[nPos];
1793 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1796 // Ignore invalid nType.
1797 if (nType <= 2)
1799 if (m_aXRef.find(nIndex) == m_aXRef.end())
1801 XRefEntry aEntry;
1802 switch (nType)
1804 case 0:
1805 aEntry.SetType(XRefEntryType::FREE);
1806 break;
1807 case 1:
1808 aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1809 break;
1810 case 2:
1811 aEntry.SetType(XRefEntryType::COMPRESSED);
1812 break;
1814 aEntry.SetOffset(nStreamOffset);
1815 m_aXRef[nIndex] = aEntry;
1822 void PDFDocument::ReadXRef(SvStream& rStream)
1824 PDFDocument::SkipWhitespace(rStream);
1826 while (true)
1828 PDFNumberElement aFirstObject;
1829 if (!aFirstObject.Read(rStream))
1831 // Next token is not a number, it'll be the trailer.
1832 return;
1835 if (aFirstObject.GetValue() < 0)
1837 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1838 return;
1841 PDFDocument::SkipWhitespace(rStream);
1842 PDFNumberElement aNumberOfEntries;
1843 if (!aNumberOfEntries.Read(rStream))
1845 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1846 return;
1849 if (aNumberOfEntries.GetValue() < 0)
1851 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1852 return;
1855 size_t nSize = aNumberOfEntries.GetValue();
1856 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1858 size_t nIndex = aFirstObject.GetValue() + nEntry;
1859 PDFDocument::SkipWhitespace(rStream);
1860 PDFNumberElement aOffset;
1861 if (!aOffset.Read(rStream))
1863 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1864 return;
1867 PDFDocument::SkipWhitespace(rStream);
1868 PDFNumberElement aGenerationNumber;
1869 if (!aGenerationNumber.Read(rStream))
1871 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1872 return;
1875 PDFDocument::SkipWhitespace(rStream);
1876 OString aKeyword = ReadKeyword(rStream);
1877 if (aKeyword != "f" && aKeyword != "n")
1879 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1880 return;
1882 // xrefs are read in reverse order, so never update an existing
1883 // offset with an older one.
1884 if (m_aXRef.find(nIndex) == m_aXRef.end())
1886 XRefEntry aEntry;
1887 aEntry.SetOffset(aOffset.GetValue());
1888 // Initially only the first entry is dirty.
1889 if (nIndex == 0)
1890 aEntry.SetDirty(true);
1891 m_aXRef[nIndex] = aEntry;
1893 PDFDocument::SkipWhitespace(rStream);
1898 void PDFDocument::SkipWhitespace(SvStream& rStream)
1900 char ch = 0;
1902 while (true)
1904 rStream.ReadChar(ch);
1905 if (rStream.eof())
1906 break;
1908 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1910 rStream.SeekRel(-1);
1911 return;
1916 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1918 char ch = 0;
1920 while (true)
1922 rStream.ReadChar(ch);
1923 if (rStream.eof())
1924 break;
1926 if (ch != '\n' && ch != '\r')
1928 rStream.SeekRel(-1);
1929 return;
1934 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1936 auto it = m_aXRef.find(nIndex);
1937 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1939 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1940 << nIndex << ", but failed");
1941 return 0;
1944 return it->second.GetOffset();
1947 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1949 return m_aElements;
1952 /// Visits the page tree recursively, looking for page objects.
1953 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1955 auto pKidsRef = pPages->Lookup("Kids"_ostr);
1956 auto pKids = dynamic_cast<PDFArrayElement*>(pKidsRef);
1957 if (!pKids)
1959 auto pRefKids = dynamic_cast<PDFReferenceElement*>(pKidsRef);
1960 if (!pRefKids)
1962 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1963 return;
1965 auto pObjWithKids = pRefKids->LookupObject();
1966 if (!pObjWithKids)
1968 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1969 return;
1972 pKids = pObjWithKids->GetArray();
1975 if (!pKids)
1977 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1978 return;
1981 pPages->setVisiting(true);
1983 for (const auto& pKid : pKids->GetElements())
1985 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1986 if (!pReference)
1987 continue;
1989 PDFObjectElement* pKidObject = pReference->LookupObject();
1990 if (!pKidObject)
1991 continue;
1993 // detect if visiting reenters itself
1994 if (pKidObject->alreadyVisiting())
1996 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1997 continue;
2000 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"_ostr));
2001 if (pName && pName->GetValue() == "Pages")
2002 // Pages inside pages: recurse.
2003 visitPages(pKidObject, rRet);
2004 else
2005 // Found an actual page.
2006 rRet.push_back(pKidObject);
2009 pPages->setVisiting(false);
2012 PDFObjectElement* PDFDocument::GetCatalog()
2014 PDFReferenceElement* pRoot = nullptr;
2016 PDFTrailerElement* pTrailer = nullptr;
2017 if (!m_aTrailerOffsets.empty())
2019 // Get access to the latest trailer, and work with the keys of that
2020 // one.
2021 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
2022 if (it != m_aOffsetTrailers.end())
2023 pTrailer = it->second;
2026 if (pTrailer)
2027 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"_ostr));
2028 else if (m_pXRefStream)
2029 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"_ostr));
2031 if (!pRoot)
2033 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
2034 return nullptr;
2037 return pRoot->LookupObject();
2040 std::vector<PDFObjectElement*> PDFDocument::GetPages()
2042 std::vector<PDFObjectElement*> aRet;
2044 PDFObjectElement* pCatalog = GetCatalog();
2045 if (!pCatalog)
2047 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
2048 return aRet;
2051 PDFObjectElement* pPages = pCatalog->LookupObject("Pages"_ostr);
2052 if (!pPages)
2054 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
2055 << ") has no pages");
2056 return aRet;
2059 visitPages(pPages, aRet);
2061 return aRet;
2064 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
2066 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2068 std::vector<PDFObjectElement*> aRet;
2070 std::vector<PDFObjectElement*> aPages = GetPages();
2072 for (const auto& pPage : aPages)
2074 if (!pPage)
2075 continue;
2077 PDFElement* pAnnotsElement = pPage->Lookup("Annots"_ostr);
2078 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2079 if (!pAnnots)
2081 // Annots is not an array, see if it's a reference to an object
2082 // with a direct array.
2083 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2084 if (pAnnotsRef)
2086 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2088 pAnnots = pAnnotsObject->GetArray();
2093 if (!pAnnots)
2094 continue;
2096 for (const auto& pAnnot : pAnnots->GetElements())
2098 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2099 if (!pReference)
2100 continue;
2102 PDFObjectElement* pAnnotObject = pReference->LookupObject();
2103 if (!pAnnotObject)
2104 continue;
2106 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"_ostr));
2107 if (!pFT || pFT->GetValue() != "Sig")
2108 continue;
2110 aRet.push_back(pAnnotObject);
2114 return aRet;
2117 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2119 return svl::crypto::DecodeHexString(pElement->GetValue());
2122 OUString PDFDocument::DecodeHexStringUTF16BE(PDFHexStringElement const& rElement)
2124 std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
2125 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2126 // only the latter supported is here
2127 if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
2129 return {};
2131 OUStringBuffer buf(encoded.size() - 2);
2132 for (size_t i = 2; i < encoded.size(); i += 2)
2134 buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
2136 return buf.makeStringAndClear();
2139 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
2140 : m_rDoc(rDoc)
2144 bool PDFCommentElement::Read(SvStream& rStream)
2146 // Read from (including) the % char till (excluding) the end of the line/stream.
2147 OStringBuffer aBuf;
2148 char ch;
2149 rStream.ReadChar(ch);
2150 while (true)
2152 if (ch == '\n' || ch == '\r' || rStream.eof())
2154 m_aComment = aBuf.makeStringAndClear();
2156 if (m_aComment.startsWith("%%EOF"))
2158 sal_uInt64 nPos = rStream.Tell();
2159 if (ch == '\r')
2161 rStream.ReadChar(ch);
2162 rStream.SeekRel(-1);
2163 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2164 // behavior.
2165 if (ch == '\n')
2167 nPos += 1;
2170 m_rDoc.PushBackEOF(nPos);
2173 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2174 return true;
2176 aBuf.append(ch);
2177 rStream.ReadChar(ch);
2180 return false;
2183 PDFNumberElement::PDFNumberElement() = default;
2185 bool PDFNumberElement::Read(SvStream& rStream)
2187 OStringBuffer aBuf;
2188 m_nOffset = rStream.Tell();
2189 char ch;
2190 rStream.ReadChar(ch);
2191 if (rStream.eof())
2193 return false;
2195 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2197 rStream.SeekRel(-1);
2198 return false;
2200 while (!rStream.eof())
2202 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2203 && ch != '.')
2205 rStream.SeekRel(-1);
2206 m_nLength = rStream.Tell() - m_nOffset;
2207 m_fValue = o3tl::toDouble(aBuf);
2208 aBuf.setLength(0);
2209 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2210 return true;
2212 aBuf.append(ch);
2213 rStream.ReadChar(ch);
2216 return false;
2219 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2221 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2223 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2225 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2227 bool PDFHexStringElement::Read(SvStream& rStream)
2229 char ch;
2230 rStream.ReadChar(ch);
2231 if (ch != '<')
2233 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2234 return false;
2236 rStream.ReadChar(ch);
2238 OStringBuffer aBuf;
2239 while (!rStream.eof())
2241 if (ch == '>')
2243 m_aValue = aBuf.makeStringAndClear();
2244 SAL_INFO("vcl.filter",
2245 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2246 return true;
2248 aBuf.append(ch);
2249 rStream.ReadChar(ch);
2252 return false;
2255 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2257 bool PDFLiteralStringElement::Read(SvStream& rStream)
2259 char nPrevCh = 0;
2260 char ch = 0;
2261 rStream.ReadChar(ch);
2262 if (ch != '(')
2264 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2265 return false;
2267 nPrevCh = ch;
2268 rStream.ReadChar(ch);
2270 // Start with 1 nesting level as we read a '(' above already.
2271 int nDepth = 1;
2272 OStringBuffer aBuf;
2273 while (!rStream.eof())
2275 if (ch == '(' && nPrevCh != '\\')
2276 ++nDepth;
2278 if (ch == ')' && nPrevCh != '\\')
2279 --nDepth;
2281 if (nDepth == 0)
2283 // ')' of the outermost '(' is reached.
2284 m_aValue = aBuf.makeStringAndClear();
2285 SAL_INFO("vcl.filter",
2286 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2287 return true;
2289 aBuf.append(ch);
2290 nPrevCh = ch;
2291 rStream.ReadChar(ch);
2294 return false;
2297 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2299 PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
2300 : m_rDoc(rDoc)
2301 , m_pDictionaryElement(nullptr)
2305 bool PDFTrailerElement::Read(SvStream& rStream)
2307 m_nOffset = rStream.Tell();
2308 return true;
2311 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2313 if (!m_pDictionaryElement)
2315 PDFObjectParser aParser(m_rDoc.GetElements());
2316 aParser.parse(this);
2318 if (!m_pDictionaryElement)
2319 return nullptr;
2320 return m_pDictionaryElement->LookupElement(rDictionaryKey);
2323 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2325 double PDFNumberElement::GetValue() const { return m_fValue; }
2327 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2328 : m_rDoc(rDoc)
2329 , m_fObjectValue(fObjectValue)
2330 , m_fGenerationValue(fGenerationValue)
2331 , m_pNumberElement(nullptr)
2332 , m_pNameElement(nullptr)
2333 , m_nDictionaryOffset(0)
2334 , m_nDictionaryLength(0)
2335 , m_pDictionaryElement(nullptr)
2336 , m_nArrayOffset(0)
2337 , m_nArrayLength(0)
2338 , m_pArrayElement(nullptr)
2339 , m_pStreamElement(nullptr)
2340 , m_bParsed(false)
2344 bool PDFObjectElement::Read(SvStream& /*rStream*/)
2346 SAL_INFO("vcl.filter",
2347 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2348 return true;
2351 PDFDictionaryElement::PDFDictionaryElement() = default;
2353 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2354 const OString& rKey)
2356 auto it = rDictionary.find(rKey);
2357 if (it == rDictionary.end())
2358 return nullptr;
2360 return it->second;
2363 PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey)
2365 auto pKey = dynamic_cast<PDFReferenceElement*>(
2366 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2367 if (!pKey)
2369 SAL_WARN("vcl.filter",
2370 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2371 << rDictionaryKey);
2372 return nullptr;
2375 return pKey->LookupObject();
2378 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2380 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2383 void PDFObjectElement::parseIfNecessary()
2385 if (m_bParsed)
2386 return;
2388 if (!m_aElements.empty())
2390 // This is a stored object in an object stream.
2391 PDFObjectParser aParser(m_aElements);
2392 aParser.parse(this);
2394 else
2396 // Normal object: elements are stored as members of the document itself.
2397 PDFObjectParser aParser(m_rDoc.GetElements());
2398 aParser.parse(this);
2400 m_bParsed = true;
2403 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2405 parseIfNecessary();
2406 if (!m_pDictionaryElement)
2407 return nullptr;
2408 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2411 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2413 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2414 if (!pKey)
2416 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2417 << rDictionaryKey);
2418 return nullptr;
2421 return pKey->LookupObject();
2424 double PDFObjectElement::GetObjectValue() const { return m_fObjectValue; }
2426 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2428 m_nDictionaryOffset = nDictionaryOffset;
2431 sal_uInt64 PDFObjectElement::GetDictionaryOffset()
2433 parseIfNecessary();
2434 return m_nDictionaryOffset;
2437 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2439 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2441 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2443 m_aDictionaryKeyOffset[rKey] = nOffset;
2446 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2448 m_aDictionaryKeyValueLength[rKey] = nLength;
2451 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2453 auto it = m_aDictionaryKeyOffset.find(rKey);
2454 if (it == m_aDictionaryKeyOffset.end())
2455 return 0;
2457 return it->second;
2460 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2462 auto it = m_aDictionaryKeyValueLength.find(rKey);
2463 if (it == m_aDictionaryKeyValueLength.end())
2464 return 0;
2466 return it->second;
2469 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2471 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2473 m_nDictionaryLength = nDictionaryLength;
2476 sal_uInt64 PDFObjectElement::GetDictionaryLength()
2478 parseIfNecessary();
2479 return m_nDictionaryLength;
2482 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2484 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2486 PDFDictionaryElement* PDFObjectElement::GetDictionary()
2488 parseIfNecessary();
2489 return m_pDictionaryElement;
2492 void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
2494 m_pDictionaryElement = pDictionaryElement;
2497 void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
2499 m_pNumberElement = pNumberElement;
2502 PDFNumberElement* PDFObjectElement::GetNumberElement() const { return m_pNumberElement; }
2504 void PDFObjectElement::SetNameElement(PDFNameElement* pNameElement)
2506 m_pNameElement = pNameElement;
2509 PDFNameElement* PDFObjectElement::GetNameElement() const { return m_pNameElement; }
2511 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2513 return m_aDictionaryReferences;
2516 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
2518 m_aDictionaryReferences.push_back(pReference);
2521 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2523 parseIfNecessary();
2524 return m_pDictionaryElement->GetItems();
2527 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2529 void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
2531 m_pStreamElement = pStreamElement;
2534 PDFStreamElement* PDFObjectElement::GetStream() const { return m_pStreamElement; }
2536 PDFArrayElement* PDFObjectElement::GetArray()
2538 parseIfNecessary();
2539 return m_pArrayElement;
2542 void PDFObjectElement::ParseStoredObjects()
2544 if (!m_pStreamElement)
2546 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2547 return;
2550 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"_ostr));
2551 if (!pType || pType->GetValue() != "ObjStm")
2553 if (!pType)
2554 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2555 else
2556 SAL_WARN("vcl.filter",
2557 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2558 return;
2561 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"_ostr));
2562 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2564 if (!pFilter)
2565 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2566 else
2567 SAL_WARN("vcl.filter",
2568 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2569 return;
2572 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"_ostr));
2573 if (!pFirst)
2575 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2576 return;
2579 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"_ostr));
2580 if (!pN)
2582 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2583 return;
2585 size_t nN = pN->GetValue();
2587 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"_ostr));
2588 if (!pLength)
2590 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2591 return;
2593 size_t nLength = pLength->GetValue();
2595 // Read and decompress it.
2596 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2597 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2598 std::vector<char> aBuf(nLength);
2599 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2600 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2601 SvMemoryStream aStream;
2602 ZCodec aZCodec;
2603 aZCodec.BeginCompression();
2604 aZCodec.Decompress(aSource, aStream);
2605 if (!aZCodec.EndCompression())
2607 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2608 return;
2611 nLength = aStream.TellEnd();
2612 aStream.Seek(0);
2613 std::vector<size_t> aObjNums;
2614 std::vector<size_t> aOffsets;
2615 std::vector<size_t> aLengths;
2616 // First iterate over and find out the lengths.
2617 for (size_t nObject = 0; nObject < nN; ++nObject)
2619 PDFNumberElement aObjNum;
2620 if (!aObjNum.Read(aStream))
2622 SAL_WARN("vcl.filter",
2623 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2624 return;
2626 aObjNums.push_back(aObjNum.GetValue());
2628 PDFDocument::SkipWhitespace(aStream);
2630 PDFNumberElement aByteOffset;
2631 if (!aByteOffset.Read(aStream))
2633 SAL_WARN("vcl.filter",
2634 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2635 return;
2637 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2639 if (aOffsets.size() > 1)
2640 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2641 if (nObject + 1 == nN)
2642 aLengths.push_back(nLength - aOffsets.back());
2644 PDFDocument::SkipWhitespace(aStream);
2647 // Now create streams with the proper length and tokenize the data.
2648 for (size_t nObject = 0; nObject < nN; ++nObject)
2650 size_t nObjNum = aObjNums[nObject];
2651 size_t nOffset = aOffsets[nObject];
2652 size_t nLen = aLengths[nObject];
2654 aStream.Seek(nOffset);
2655 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2656 PDFObjectElement* pStored = m_aStoredElements.back().get();
2658 aBuf.clear();
2659 aBuf.resize(nLen);
2660 aStream.ReadBytes(aBuf.data(), aBuf.size());
2661 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2663 m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2664 pStored);
2665 // This is how references know the object is stored inside this object stream.
2666 m_rDoc.SetIDObject(nObjNum, pStored);
2668 // Store the stream of the object in the object stream for later use.
2669 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2670 aStoredStream.Seek(0);
2671 pStreamBuffer->WriteStream(aStoredStream);
2672 pStored->SetStreamBuffer(pStreamBuffer);
2676 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2678 return m_aElements;
2681 SvMemoryStream* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer.get(); }
2683 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2685 m_pStreamBuffer = std::move(pStreamBuffer);
2688 PDFDocument& PDFObjectElement::GetDocument() { return m_rDoc; }
2690 PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
2691 PDFNumberElement const& rGeneration)
2692 : m_rDoc(rDoc)
2693 , m_fObjectValue(rObject.GetValue())
2694 , m_fGenerationValue(rGeneration.GetValue())
2695 , m_rObject(rObject)
2699 PDFNumberElement& PDFReferenceElement::GetObjectElement() const { return m_rObject; }
2701 bool PDFReferenceElement::Read(SvStream& rStream)
2703 SAL_INFO("vcl.filter",
2704 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2705 m_nOffset = rStream.Tell();
2706 return true;
2709 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2711 double PDFReferenceElement::LookupNumber(SvStream& rStream) const
2713 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2714 if (nOffset == 0)
2716 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2717 << m_fObjectValue);
2718 return 0;
2721 sal_uInt64 nOrigPos = rStream.Tell();
2722 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2724 rStream.Seek(nOffset);
2726 PDFDocument::SkipWhitespace(rStream);
2727 PDFNumberElement aNumber;
2728 bool bRet = aNumber.Read(rStream);
2729 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2731 SAL_WARN("vcl.filter",
2732 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2733 return 0;
2738 PDFDocument::SkipWhitespace(rStream);
2739 PDFNumberElement aNumber;
2740 bool bRet = aNumber.Read(rStream);
2741 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2743 SAL_WARN("vcl.filter",
2744 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2745 return 0;
2750 PDFDocument::SkipWhitespace(rStream);
2751 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2752 if (aKeyword != "obj")
2754 SAL_WARN("vcl.filter",
2755 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2756 return 0;
2760 PDFDocument::SkipWhitespace(rStream);
2761 PDFNumberElement aNumber;
2762 if (!aNumber.Read(rStream))
2764 SAL_WARN("vcl.filter",
2765 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2766 return 0;
2769 return aNumber.GetValue();
2772 PDFObjectElement* PDFReferenceElement::LookupObject()
2774 return m_rDoc.LookupObject(m_fObjectValue);
2777 PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber)
2779 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2781 if (itIDObjects != m_aIDObjects.end())
2782 return itIDObjects->second;
2784 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2785 return nullptr;
2788 SvMemoryStream& PDFDocument::GetEditBuffer() { return m_aEditBuffer; }
2790 int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue; }
2792 int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue; }
2794 bool PDFDictionaryElement::Read(SvStream& rStream)
2796 char ch;
2797 rStream.ReadChar(ch);
2798 if (ch != '<')
2800 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2801 return false;
2804 if (rStream.eof())
2806 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2807 return false;
2810 rStream.ReadChar(ch);
2811 if (ch != '<')
2813 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2814 return false;
2817 m_nLocation = rStream.Tell();
2819 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2821 return true;
2824 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2826 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
2828 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2830 m_nLocation = rStream.Tell();
2831 char ch;
2832 rStream.ReadChar(ch);
2833 if (ch != '>')
2835 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2836 return false;
2839 if (rStream.eof())
2841 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2842 return false;
2845 rStream.ReadChar(ch);
2846 if (ch != '>')
2848 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2849 return false;
2852 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2854 return true;
2857 PDFNameElement::PDFNameElement() = default;
2859 bool PDFNameElement::Read(SvStream& rStream)
2861 char ch;
2862 rStream.ReadChar(ch);
2863 if (ch != '/')
2865 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2866 return false;
2868 m_nLocation = rStream.Tell();
2870 if (rStream.eof())
2872 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2873 return false;
2876 // Read till the first white-space.
2877 OStringBuffer aBuf;
2878 rStream.ReadChar(ch);
2879 while (!rStream.eof())
2881 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2882 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2884 rStream.SeekRel(-1);
2885 m_aValue = aBuf.makeStringAndClear();
2886 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2887 return true;
2889 aBuf.append(ch);
2890 rStream.ReadChar(ch);
2893 return false;
2896 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2898 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2900 PDFStreamElement::PDFStreamElement(size_t nLength)
2901 : m_nLength(nLength)
2902 , m_nOffset(0)
2906 bool PDFStreamElement::Read(SvStream& rStream)
2908 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2909 m_nOffset = rStream.Tell();
2910 std::vector<unsigned char> aBytes(m_nLength);
2911 rStream.ReadBytes(aBytes.data(), aBytes.size());
2912 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2914 return rStream.good();
2917 SvMemoryStream& PDFStreamElement::GetMemory() { return m_aMemory; }
2919 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2921 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2923 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2925 PDFArrayElement::PDFArrayElement(PDFObjectElement* pObject)
2926 : m_pObject(pObject)
2930 bool PDFArrayElement::Read(SvStream& rStream)
2932 char ch;
2933 rStream.ReadChar(ch);
2934 if (ch != '[')
2936 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2937 return false;
2940 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2942 return true;
2945 void PDFArrayElement::PushBack(PDFElement* pElement)
2947 if (m_pObject)
2948 SAL_INFO("vcl.filter",
2949 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2950 m_aElements.push_back(pElement);
2953 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2955 PDFEndArrayElement::PDFEndArrayElement() = default;
2957 bool PDFEndArrayElement::Read(SvStream& rStream)
2959 m_nOffset = rStream.Tell();
2960 char ch;
2961 rStream.ReadChar(ch);
2962 if (ch != ']')
2964 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2965 return false;
2968 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2970 return true;
2973 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2975 // PDFObjectParser
2977 size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2979 // The index of last parsed element
2980 size_t nReturnIndex = 0;
2982 pParsingElement->setParsing(true);
2984 comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2986 // Current object, if root is an object, else nullptr
2987 auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2988 auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2990 // Current dictionary, if root is an dictionary, else nullptr
2991 auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2993 // Current parsing array, if root is an array, else nullptr
2994 auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2996 // Find out where the dictionary for this object starts.
2997 size_t nIndex = nStartIndex;
2998 for (size_t i = nStartIndex; i < mrElements.size(); ++i)
3000 if (mrElements[i].get() == pParsingElement)
3002 nIndex = i;
3003 break;
3007 OString aName;
3008 sal_uInt64 nNameOffset = 0;
3009 std::vector<PDFNumberElement*> aNumbers;
3011 sal_uInt64 nDictionaryOffset = 0;
3013 // Current depth; 1 is current
3014 int nDepth = 0;
3016 for (size_t i = nIndex; i < mrElements.size(); ++i)
3018 auto* pCurrentElement = mrElements[i].get();
3020 // Dictionary tokens can be nested, track enter/leave.
3021 if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
3023 // Handle previously stored number
3024 if (!aNumbers.empty())
3026 if (pParsingDictionary)
3028 PDFNumberElement* pNumber = aNumbers.back();
3029 sal_uInt64 nLength
3030 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3032 pParsingDictionary->insert(aName, pNumber);
3033 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3034 pParsingDictionary->SetKeyValueLength(aName, nLength);
3036 else if (pParsingArray)
3038 for (auto& pNumber : aNumbers)
3039 pParsingArray->PushBack(pNumber);
3041 else
3043 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3045 aName.clear();
3046 aNumbers.clear();
3049 nDepth++;
3051 if (nDepth == 1) // pParsingDictionary is the current one
3053 // First dictionary start, track start offset.
3054 nDictionaryOffset = pCurrentDictionary->GetLocation();
3056 if (pParsingObject)
3058 // Then the toplevel dictionary of the object.
3059 pParsingObject->SetDictionary(pCurrentDictionary);
3060 pParsingObject->SetDictionaryOffset(nDictionaryOffset);
3061 pParsingDictionary = pCurrentDictionary;
3063 else if (pParsingTrailer)
3065 pParsingTrailer->SetDictionary(pCurrentDictionary);
3066 pParsingDictionary = pCurrentDictionary;
3069 else if (!pCurrentDictionary->alreadyParsing())
3071 if (pParsingArray)
3073 pParsingArray->PushBack(pCurrentDictionary);
3075 else if (pParsingDictionary)
3077 // Dictionary toplevel value.
3078 pParsingDictionary->insert(aName, pCurrentDictionary);
3080 else
3082 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3084 // Nested dictionary.
3085 const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
3086 i = std::max(i, nNextElementIndex - 1);
3089 else if (auto pCurrentEndDictionary
3090 = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
3092 // Handle previously stored number
3093 if (!aNumbers.empty())
3095 if (pParsingDictionary)
3097 PDFNumberElement* pNumber = aNumbers.back();
3098 sal_uInt64 nLength
3099 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3101 pParsingDictionary->insert(aName, pNumber);
3102 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3103 pParsingDictionary->SetKeyValueLength(aName, nLength);
3105 else if (pParsingArray)
3107 for (auto& pNumber : aNumbers)
3108 pParsingArray->PushBack(pNumber);
3110 else
3112 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3114 aName.clear();
3115 aNumbers.clear();
3118 if (pParsingDictionary)
3120 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3121 sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3122 pParsingDictionary->SetKeyValueLength(aName, nLength);
3123 aName.clear();
3126 if (nDepth == 1) // did the parsing ended
3128 // Last dictionary end, track length and stop parsing.
3129 if (pParsingObject)
3131 sal_uInt64 nDictionaryLength
3132 = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3133 pParsingObject->SetDictionaryLength(nDictionaryLength);
3135 nReturnIndex = i;
3136 break;
3139 nDepth--;
3141 else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3143 // Handle previously stored number
3144 if (!aNumbers.empty())
3146 if (pParsingDictionary)
3148 PDFNumberElement* pNumber = aNumbers.back();
3150 sal_uInt64 nLength
3151 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3152 pParsingDictionary->insert(aName, pNumber);
3153 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3154 pParsingDictionary->SetKeyValueLength(aName, nLength);
3156 else if (pParsingArray)
3158 for (auto& pNumber : aNumbers)
3159 pParsingArray->PushBack(pNumber);
3161 else
3163 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3165 aName.clear();
3166 aNumbers.clear();
3169 nDepth++;
3170 if (nDepth == 1) // pParsingDictionary is the current one
3172 if (pParsingObject)
3174 pParsingObject->SetArray(pCurrentArray);
3175 pParsingArray = pCurrentArray;
3178 else if (!pCurrentArray->alreadyParsing())
3180 if (pParsingArray)
3182 // Array is toplevel
3183 pParsingArray->PushBack(pCurrentArray);
3185 else if (pParsingDictionary)
3187 // Dictionary toplevel value.
3188 pParsingDictionary->insert(aName, pCurrentArray);
3191 const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3193 // ensure we go forwards and not endlessly loop
3194 i = std::max(i, nNextElementIndex - 1);
3197 else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3199 // Handle previously stored number
3200 if (!aNumbers.empty())
3202 if (pParsingDictionary)
3204 PDFNumberElement* pNumber = aNumbers.back();
3206 sal_uInt64 nLength
3207 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3208 pParsingDictionary->insert(aName, pNumber);
3209 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3210 pParsingDictionary->SetKeyValueLength(aName, nLength);
3212 else if (pParsingArray)
3214 for (auto& pNumber : aNumbers)
3215 pParsingArray->PushBack(pNumber);
3217 else
3219 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3221 aName.clear();
3222 aNumbers.clear();
3225 if (nDepth == 1) // did the pParsing ended
3227 // Last array end, track length and stop parsing.
3228 nReturnIndex = i;
3229 break;
3232 if (pParsingDictionary)
3234 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3235 // Include the ending ']' in the length of the key - (array)value pair length.
3236 sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3237 pParsingDictionary->SetKeyValueLength(aName, nLength);
3238 aName.clear();
3240 nDepth--;
3242 else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3244 // Handle previously stored number
3245 if (!aNumbers.empty())
3247 if (pParsingDictionary)
3249 PDFNumberElement* pNumber = aNumbers.back();
3251 sal_uInt64 nLength
3252 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3253 pParsingDictionary->insert(aName, pNumber);
3254 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3255 pParsingDictionary->SetKeyValueLength(aName, nLength);
3257 else if (pParsingArray)
3259 for (auto& pNumber : aNumbers)
3260 pParsingArray->PushBack(pNumber);
3262 aName.clear();
3263 aNumbers.clear();
3266 // Now handle name
3267 if (pParsingArray)
3269 // if we are in an array, just push the name to array
3270 pParsingArray->PushBack(pCurrentName);
3272 else if (pParsingDictionary)
3274 // if we are in a dictionary, we need to store the name as a possible key
3275 if (aName.isEmpty())
3277 aName = pCurrentName->GetValue();
3278 nNameOffset = pCurrentName->GetLocation();
3280 else
3282 sal_uInt64 nKeyLength
3283 = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3284 pParsingDictionary->insert(aName, pCurrentName);
3285 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3286 pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3287 aName.clear();
3291 else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3293 // Handle previously stored number
3294 if (aNumbers.size() > 2)
3296 aNumbers.resize(aNumbers.size() - 2);
3297 if (pParsingArray)
3299 for (auto& pNumber : aNumbers)
3300 pParsingArray->PushBack(pNumber);
3302 aNumbers.clear();
3305 if (pParsingArray)
3307 pParsingArray->PushBack(pReference);
3309 else if (pParsingDictionary)
3311 sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3312 pParsingDictionary->insert(aName, pReference);
3313 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3314 pParsingDictionary->SetKeyValueLength(aName, nLength);
3315 aName.clear();
3317 else
3319 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3321 aNumbers.clear();
3323 else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3325 // Handle previously stored number
3326 if (!aNumbers.empty())
3328 if (pParsingArray)
3330 for (auto& pNumber : aNumbers)
3331 pParsingArray->PushBack(pNumber);
3333 aNumbers.clear();
3336 if (pParsingArray)
3338 pParsingArray->PushBack(pLiteralString);
3340 else if (pParsingDictionary)
3342 pParsingDictionary->insert(aName, pLiteralString);
3343 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3344 aName.clear();
3346 else
3348 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3351 else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3353 // Handle previously stored number
3354 if (!aNumbers.empty())
3356 if (pParsingArray)
3358 for (auto& pNumber : aNumbers)
3359 pParsingArray->PushBack(pNumber);
3361 aNumbers.clear();
3364 if (pParsingArray)
3366 pParsingArray->PushBack(pBoolean);
3368 else if (pParsingDictionary)
3370 pParsingDictionary->insert(aName, pBoolean);
3371 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3372 aName.clear();
3374 else
3376 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3379 else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3381 // Handle previously stored number
3382 if (!aNumbers.empty())
3384 if (pParsingArray)
3386 for (auto& pNumber : aNumbers)
3387 pParsingArray->PushBack(pNumber);
3389 aNumbers.clear();
3392 if (pParsingArray)
3394 pParsingArray->PushBack(pHexString);
3396 else if (pParsingDictionary)
3398 pParsingDictionary->insert(aName, pHexString);
3399 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3400 aName.clear();
3403 else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3405 // Just remember this, so that in case it's not a reference parameter,
3406 // we can handle it later.
3407 aNumbers.push_back(pNumberElement);
3409 else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3411 // parsing of the object is finished
3412 break;
3414 else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3415 || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3417 continue;
3419 else
3421 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3425 return nReturnIndex;
3428 } // namespace vcl
3430 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */