bump product version to 7.2.5.1
[LibreOffice.git] / vcl / source / filter / ipdf / pdfdocument.cxx
blobf5fc63be558fce99275bc4b6a7fcc58d4c159bb3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <vcl/filter/pdfdocument.hxx>
12 #include <map>
13 #include <memory>
14 #include <vector>
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
19 #include <comphelper/scopeguard.hxx>
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 #include <o3tl/safeint.hxx>
31 #include <pdf/objectcopier.hxx>
33 using namespace com::sun::star;
35 namespace vcl::filter
37 XRefEntry::XRefEntry() = default;
39 PDFDocument::PDFDocument() = default;
41 PDFDocument::~PDFDocument() = default;
43 bool PDFDocument::RemoveSignature(size_t nPosition)
45 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
46 if (nPosition >= aSignatures.size())
48 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
49 return false;
52 if (aSignatures.size() != m_aEOFs.size() - 1)
54 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
55 "and incremental updates");
56 return false;
59 // The EOF offset is the end of the original file, without the signature at
60 // nPosition.
61 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
62 // Drop all bytes after the current position.
63 m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
65 return m_aEditBuffer.good();
68 sal_Int32 PDFDocument::createObject()
70 sal_Int32 nObject = m_aXRef.size();
71 m_aXRef[nObject] = XRefEntry();
72 return nObject;
75 bool PDFDocument::updateObject(sal_Int32 nObject)
77 if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
79 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
80 return false;
83 XRefEntry aEntry;
84 aEntry.SetOffset(m_aEditBuffer.Tell());
85 aEntry.SetDirty(true);
86 m_aXRef[nObject] = aEntry;
87 return true;
90 bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
92 std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
93 return nWritten == nBytes;
96 void PDFDocument::SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine)
98 m_aSignatureLine = rSignatureLine;
101 void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
103 sal_uInt32 PDFDocument::GetNextSignature()
105 sal_uInt32 nRet = 0;
106 for (const auto& pSignature : GetSignatureWidgets())
108 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
109 if (!pT)
110 continue;
112 const OString& rValue = pT->GetValue();
113 const OString aPrefix = "Signature";
114 if (!rValue.startsWith(aPrefix))
115 continue;
117 nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
120 return nRet + 1;
123 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
124 sal_uInt64& rLastByteRangeOffset,
125 sal_Int64& rContentOffset)
127 // Write signature object.
128 sal_Int32 nSignatureId = m_aXRef.size();
129 XRefEntry aSignatureEntry;
130 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
131 aSignatureEntry.SetDirty(true);
132 m_aXRef[nSignatureId] = aSignatureEntry;
133 OStringBuffer aSigBuffer;
134 aSigBuffer.append(nSignatureId);
135 aSigBuffer.append(" 0 obj\n");
136 aSigBuffer.append("<</Contents <");
137 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
138 // Reserve space for the PKCS#7 object.
139 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
140 comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
141 aSigBuffer.append(aContentFiller.makeStringAndClear());
142 aSigBuffer.append(">\n/Type/Sig/SubFilter");
143 if (bAdES)
144 aSigBuffer.append("/ETSI.CAdES.detached");
145 else
146 aSigBuffer.append("/adbe.pkcs7.detached");
148 // Time of signing.
149 aSigBuffer.append(" /M (");
150 aSigBuffer.append(vcl::PDFWriter::GetDateTime());
151 aSigBuffer.append(")");
153 // Byte range: we can write offset1-length1 and offset2 right now, will
154 // write length2 later.
155 aSigBuffer.append(" /ByteRange [ 0 ");
156 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
157 aSigBuffer.append(rContentOffset - 1);
158 aSigBuffer.append(" ");
159 aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
160 aSigBuffer.append(" ");
161 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
162 // We don't know how many bytes we need for the last ByteRange value, this
163 // should be enough.
164 OStringBuffer aByteRangeFiller;
165 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
166 aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
167 // Finish the Sig obj.
168 aSigBuffer.append(" /Filter/Adobe.PPKMS");
170 if (!rDescription.isEmpty())
172 aSigBuffer.append("/Reason<");
173 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
174 aSigBuffer.append(">");
177 aSigBuffer.append(" >>\nendobj\n\n");
178 m_aEditBuffer.WriteOString(aSigBuffer.toString());
180 return nSignatureId;
183 sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
185 PDFDocument aPDFDocument;
186 filter::PDFObjectElement* pPage = nullptr;
187 std::vector<filter::PDFObjectElement*> aContentStreams;
189 if (!m_aSignatureLine.empty())
191 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
192 // based on it.
193 SvMemoryStream aPDFStream;
194 aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
195 aPDFStream.Seek(0);
196 if (!aPDFDocument.Read(aPDFStream))
198 SAL_WARN("vcl.filter",
199 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
200 return -1;
203 std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
204 if (aPages.empty())
206 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
207 return -1;
210 pPage = aPages[0];
211 if (!pPage)
213 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
214 return -1;
217 // Calculate the bounding box.
218 PDFElement* pMediaBox = pPage->Lookup("MediaBox");
219 auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
220 if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
222 SAL_WARN("vcl.filter",
223 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
224 return -1;
226 const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
227 auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
228 if (!pWidth)
230 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
231 return -1;
233 rSignatureRectangle.setWidth(pWidth->GetValue());
234 auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
235 if (!pHeight)
237 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
238 return -1;
240 rSignatureRectangle.setHeight(pHeight->GetValue());
242 if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
244 aContentStreams.push_back(pContentStream);
247 if (aContentStreams.empty())
249 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
250 return -1;
253 m_aSignatureLine.clear();
255 // Write appearance object: allocate an ID.
256 sal_Int32 nAppearanceId = m_aXRef.size();
257 m_aXRef[nAppearanceId] = XRefEntry();
259 // Write the object content.
260 SvMemoryStream aEditBuffer;
261 aEditBuffer.WriteUInt32AsString(nAppearanceId);
262 aEditBuffer.WriteCharPtr(" 0 obj\n");
263 aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
265 PDFObjectCopier aCopier(*this);
266 if (!aContentStreams.empty())
268 assert(pPage && "aContentStreams is only filled if there was a pPage");
269 OStringBuffer aBuffer;
270 aCopier.copyPageResources(pPage, aBuffer);
271 aEditBuffer.WriteOString(aBuffer.makeStringAndClear());
274 aEditBuffer.WriteCharPtr("/BBox[0 0 ");
275 aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
276 aEditBuffer.WriteCharPtr(" ");
277 aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
278 aEditBuffer.WriteCharPtr("]\n/Length ");
280 // Add the object to the doc-level edit buffer and update the offset.
281 SvMemoryStream aStream;
282 bool bCompressed = false;
283 sal_Int32 nLength = 0;
284 if (!aContentStreams.empty())
286 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
288 aEditBuffer.WriteOString(OString::number(nLength));
289 if (bCompressed)
291 aEditBuffer.WriteOString(" /Filter/FlateDecode");
294 aEditBuffer.WriteCharPtr("\n>>\n");
296 aEditBuffer.WriteCharPtr("stream\n");
298 // Copy the original page streams to the form XObject stream.
299 aStream.Seek(0);
300 aEditBuffer.WriteStream(aStream);
302 aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
304 aEditBuffer.Seek(0);
305 XRefEntry aAppearanceEntry;
306 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
307 aAppearanceEntry.SetDirty(true);
308 m_aXRef[nAppearanceId] = aAppearanceEntry;
309 m_aEditBuffer.WriteStream(aEditBuffer);
311 return nAppearanceId;
314 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
315 sal_Int32 nAppearanceId,
316 const tools::Rectangle& rSignatureRectangle)
318 // Decide what identifier to use for the new signature.
319 sal_uInt32 nNextSignature = GetNextSignature();
321 // Write the Annot object, references nSignatureId and nAppearanceId.
322 sal_Int32 nAnnotId = m_aXRef.size();
323 XRefEntry aAnnotEntry;
324 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
325 aAnnotEntry.SetDirty(true);
326 m_aXRef[nAnnotId] = aAnnotEntry;
327 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
328 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
329 m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
330 m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
331 m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
332 m_aEditBuffer.WriteCharPtr(" ");
333 m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
334 m_aEditBuffer.WriteCharPtr("]\n");
335 m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
336 m_aEditBuffer.WriteCharPtr("/P ");
337 m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
338 m_aEditBuffer.WriteCharPtr(" 0 R\n");
339 m_aEditBuffer.WriteCharPtr("/T(Signature");
340 m_aEditBuffer.WriteUInt32AsString(nNextSignature);
341 m_aEditBuffer.WriteCharPtr(")\n");
342 m_aEditBuffer.WriteCharPtr("/V ");
343 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
344 m_aEditBuffer.WriteCharPtr(" 0 R\n");
345 m_aEditBuffer.WriteCharPtr("/DV ");
346 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
347 m_aEditBuffer.WriteCharPtr(" 0 R\n");
348 m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
349 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
350 m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
351 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
353 return nAnnotId;
356 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
358 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
359 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
360 if (pAnnotsReference)
362 // Write the updated Annots key of the Page object.
363 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
364 if (!pAnnotsObject)
366 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
367 return false;
370 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
371 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
372 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
373 m_aXRef[nAnnotsId].SetDirty(true);
374 m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
375 m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
377 // Write existing references.
378 PDFArrayElement* pArray = pAnnotsObject->GetArray();
379 if (!pArray)
381 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
382 return false;
385 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
387 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
388 if (!pReference)
389 continue;
391 if (i)
392 m_aEditBuffer.WriteCharPtr(" ");
393 m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
394 m_aEditBuffer.WriteCharPtr(" 0 R");
396 // Write our reference.
397 m_aEditBuffer.WriteCharPtr(" ");
398 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
399 m_aEditBuffer.WriteCharPtr(" 0 R");
401 m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
403 else
405 // Write the updated first page object, references nAnnotId.
406 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
407 if (nFirstPageId >= m_aXRef.size())
409 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
410 return false;
412 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
413 m_aXRef[nFirstPageId].SetDirty(true);
414 m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
415 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
416 m_aEditBuffer.WriteCharPtr("<<");
417 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
418 if (!pAnnotsArray)
420 // No Annots key, just write the key with a single reference.
421 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
422 + rFirstPage.GetDictionaryOffset(),
423 rFirstPage.GetDictionaryLength());
424 m_aEditBuffer.WriteCharPtr("/Annots[");
425 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
426 m_aEditBuffer.WriteCharPtr(" 0 R]");
428 else
430 // Annots key is already there, insert our reference at the end.
431 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
433 // Offset right before the end of the Annots array.
434 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
435 + pDictionary->GetKeyValueLength("Annots") - 1;
436 // Length of beginning of the dictionary -> Annots end.
437 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
438 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
439 + rFirstPage.GetDictionaryOffset(),
440 nAnnotsBeforeEndLength);
441 m_aEditBuffer.WriteCharPtr(" ");
442 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
443 m_aEditBuffer.WriteCharPtr(" 0 R");
444 // Length of Annots end -> end of the dictionary.
445 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
446 + rFirstPage.GetDictionaryLength()
447 - nAnnotsEndOffset;
448 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
449 + nAnnotsEndOffset,
450 nAnnotsAfterEndLength);
452 m_aEditBuffer.WriteCharPtr(">>");
453 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
456 return true;
459 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
461 if (m_pXRefStream)
462 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
463 else
465 if (!m_pTrailer)
467 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
468 return false;
470 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
472 if (!pRoot)
474 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
475 return false;
477 PDFObjectElement* pCatalog = pRoot->LookupObject();
478 if (!pCatalog)
480 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
481 return false;
483 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
484 if (nCatalogId >= m_aXRef.size())
486 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
487 return false;
489 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
490 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
491 if (pAcroFormReference)
493 // Write the updated AcroForm key of the Catalog object.
494 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
495 if (!pAcroFormObject)
497 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
498 return false;
501 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
502 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
503 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
504 m_aXRef[nAcroFormId].SetDirty(true);
505 m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
506 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
508 // If this is nullptr, then the AcroForm object is not in an object stream.
509 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
511 if (!pAcroFormObject->Lookup("Fields"))
513 SAL_WARN("vcl.filter",
514 "PDFDocument::Sign: AcroForm object without required Fields key");
515 return false;
518 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
519 if (!pAcroFormDictionary)
521 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
522 return false;
525 // Offset right before the end of the Fields array.
526 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
527 + pAcroFormDictionary->GetKeyValueLength("Fields")
528 - strlen("]");
530 // Length of beginning of the object dictionary -> Fields end.
531 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
532 if (pStreamBuffer)
533 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
534 else
536 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
537 m_aEditBuffer.WriteCharPtr("<<");
538 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
539 + pAcroFormObject->GetDictionaryOffset(),
540 nFieldsBeforeEndLength);
543 // Append our reference at the end of the Fields array.
544 m_aEditBuffer.WriteCharPtr(" ");
545 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
546 m_aEditBuffer.WriteCharPtr(" 0 R");
548 // Length of Fields end -> end of the object dictionary.
549 if (pStreamBuffer)
551 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
552 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
553 + nFieldsEndOffset,
554 nFieldsAfterEndLength);
556 else
558 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
559 + pAcroFormObject->GetDictionaryLength()
560 - nFieldsEndOffset;
561 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
562 + nFieldsEndOffset,
563 nFieldsAfterEndLength);
564 m_aEditBuffer.WriteCharPtr(">>");
567 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
569 else
571 // Write the updated Catalog object, references nAnnotId.
572 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
573 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
574 m_aXRef[nCatalogId].SetDirty(true);
575 m_aEditBuffer.WriteUInt32AsString(nCatalogId);
576 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
577 m_aEditBuffer.WriteCharPtr("<<");
578 if (!pAcroFormDictionary)
580 // No AcroForm key, assume no signatures.
581 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
582 + pCatalog->GetDictionaryOffset(),
583 pCatalog->GetDictionaryLength());
584 m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
585 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
586 m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
588 else
590 // AcroForm key is already there, insert our reference at the Fields end.
591 auto it = pAcroFormDictionary->GetItems().find("Fields");
592 if (it == pAcroFormDictionary->GetItems().end())
594 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
595 return false;
598 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
599 if (!pFields)
601 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
602 return false;
605 // Offset right before the end of the Fields array.
606 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
607 + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
608 // Length of beginning of the Catalog dictionary -> Fields end.
609 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
610 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
611 + pCatalog->GetDictionaryOffset(),
612 nFieldsBeforeEndLength);
613 m_aEditBuffer.WriteCharPtr(" ");
614 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
615 m_aEditBuffer.WriteCharPtr(" 0 R");
616 // Length of Fields end -> end of the Catalog dictionary.
617 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
618 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
619 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
620 + nFieldsEndOffset,
621 nFieldsAfterEndLength);
623 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
626 return true;
629 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
631 if (m_pXRefStream)
633 // Write the xref stream.
634 // This is a bit meta: the xref stream stores its own offset.
635 sal_Int32 nXRefStreamId = m_aXRef.size();
636 XRefEntry aXRefStreamEntry;
637 aXRefStreamEntry.SetOffset(nXRefOffset);
638 aXRefStreamEntry.SetDirty(true);
639 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
641 // Write stream data.
642 SvMemoryStream aXRefStream;
643 const size_t nOffsetLen = 3;
644 // 3 additional bytes: predictor, the first and the third field.
645 const size_t nLineLength = nOffsetLen + 3;
646 // This is the line as it appears before tweaking according to the predictor.
647 std::vector<unsigned char> aOrigLine(nLineLength);
648 // This is the previous line.
649 std::vector<unsigned char> aPrevLine(nLineLength);
650 // This is the line as written to the stream.
651 std::vector<unsigned char> aFilteredLine(nLineLength);
652 for (const auto& rXRef : m_aXRef)
654 const XRefEntry& rEntry = rXRef.second;
656 if (!rEntry.GetDirty())
657 continue;
659 // Predictor.
660 size_t nPos = 0;
661 // PNG prediction: up (on all rows).
662 aOrigLine[nPos++] = 2;
664 // First field.
665 unsigned char nType = 0;
666 switch (rEntry.GetType())
668 case XRefEntryType::FREE:
669 nType = 0;
670 break;
671 case XRefEntryType::NOT_COMPRESSED:
672 nType = 1;
673 break;
674 case XRefEntryType::COMPRESSED:
675 nType = 2;
676 break;
678 aOrigLine[nPos++] = nType;
680 // Second field.
681 for (size_t i = 0; i < nOffsetLen; ++i)
683 size_t nByte = nOffsetLen - i - 1;
684 // Fields requiring more than one byte are stored with the
685 // high-order byte first.
686 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
687 aOrigLine[nPos++] = nCh;
690 // Third field.
691 aOrigLine[nPos++] = 0;
693 // Now apply the predictor.
694 aFilteredLine[0] = aOrigLine[0];
695 for (size_t i = 1; i < nLineLength; ++i)
697 // Count the delta vs the previous line.
698 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
699 // Remember the new reference.
700 aPrevLine[i] = aOrigLine[i];
703 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
706 m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
707 m_aEditBuffer.WriteCharPtr(
708 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
710 // ID.
711 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
712 if (pID)
714 const std::vector<PDFElement*>& rElements = pID->GetElements();
715 m_aEditBuffer.WriteCharPtr("/ID [ <");
716 for (size_t i = 0; i < rElements.size(); ++i)
718 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
719 if (!pIDString)
720 continue;
722 m_aEditBuffer.WriteOString(pIDString->GetValue());
723 if ((i + 1) < rElements.size())
724 m_aEditBuffer.WriteCharPtr("> <");
726 m_aEditBuffer.WriteCharPtr("> ] ");
729 // Index.
730 m_aEditBuffer.WriteCharPtr("/Index [ ");
731 for (const auto& rXRef : m_aXRef)
733 if (!rXRef.second.GetDirty())
734 continue;
736 m_aEditBuffer.WriteUInt32AsString(rXRef.first);
737 m_aEditBuffer.WriteCharPtr(" 1 ");
739 m_aEditBuffer.WriteCharPtr("] ");
741 // Info.
742 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
743 if (pInfo)
745 m_aEditBuffer.WriteCharPtr("/Info ");
746 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
747 m_aEditBuffer.WriteCharPtr(" ");
748 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
749 m_aEditBuffer.WriteCharPtr(" R ");
752 // Length.
753 m_aEditBuffer.WriteCharPtr("/Length ");
755 ZCodec aZCodec;
756 aZCodec.BeginCompression();
757 aXRefStream.Seek(0);
758 SvMemoryStream aStream;
759 aZCodec.Compress(aXRefStream, aStream);
760 aZCodec.EndCompression();
761 aXRefStream.Seek(0);
762 aXRefStream.SetStreamSize(0);
763 aStream.Seek(0);
764 aXRefStream.WriteStream(aStream);
766 m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
768 if (!m_aStartXRefs.empty())
770 // Write location of the previous cross-reference section.
771 m_aEditBuffer.WriteCharPtr("/Prev ");
772 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
775 // Root.
776 m_aEditBuffer.WriteCharPtr("/Root ");
777 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
778 m_aEditBuffer.WriteCharPtr(" ");
779 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
780 m_aEditBuffer.WriteCharPtr(" R ");
782 // Size.
783 m_aEditBuffer.WriteCharPtr("/Size ");
784 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
786 m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
787 aXRefStream.Seek(0);
788 m_aEditBuffer.WriteStream(aXRefStream);
789 m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
791 else
793 // Write the xref table.
794 m_aEditBuffer.WriteCharPtr("xref\n");
795 for (const auto& rXRef : m_aXRef)
797 size_t nObject = rXRef.first;
798 size_t nOffset = rXRef.second.GetOffset();
799 if (!rXRef.second.GetDirty())
800 continue;
802 m_aEditBuffer.WriteUInt32AsString(nObject);
803 m_aEditBuffer.WriteCharPtr(" 1\n");
804 OStringBuffer aBuffer;
805 aBuffer.append(static_cast<sal_Int32>(nOffset));
806 while (aBuffer.getLength() < 10)
807 aBuffer.insert(0, "0");
808 if (nObject == 0)
809 aBuffer.append(" 65535 f \n");
810 else
811 aBuffer.append(" 00000 n \n");
812 m_aEditBuffer.WriteOString(aBuffer.toString());
815 // Write the trailer.
816 m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
817 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
818 m_aEditBuffer.WriteCharPtr("/Root ");
819 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
820 m_aEditBuffer.WriteCharPtr(" ");
821 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
822 m_aEditBuffer.WriteCharPtr(" R\n");
823 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
824 if (pInfo)
826 m_aEditBuffer.WriteCharPtr("/Info ");
827 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
828 m_aEditBuffer.WriteCharPtr(" ");
829 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
830 m_aEditBuffer.WriteCharPtr(" R\n");
832 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
833 if (pID)
835 const std::vector<PDFElement*>& rElements = pID->GetElements();
836 m_aEditBuffer.WriteCharPtr("/ID [ <");
837 for (size_t i = 0; i < rElements.size(); ++i)
839 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
840 if (!pIDString)
841 continue;
843 m_aEditBuffer.WriteOString(pIDString->GetValue());
844 if ((i + 1) < rElements.size())
845 m_aEditBuffer.WriteCharPtr(">\n<");
847 m_aEditBuffer.WriteCharPtr("> ]\n");
850 if (!m_aStartXRefs.empty())
852 // Write location of the previous cross-reference section.
853 m_aEditBuffer.WriteCharPtr("/Prev ");
854 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
857 m_aEditBuffer.WriteCharPtr(">>\n");
861 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
862 const OUString& rDescription, bool bAdES)
864 m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
865 m_aEditBuffer.WriteCharPtr("\n");
867 sal_uInt64 nSignatureLastByteRangeOffset = 0;
868 sal_Int64 nSignatureContentOffset = 0;
869 sal_Int32 nSignatureId = WriteSignatureObject(
870 rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
872 tools::Rectangle aSignatureRectangle;
873 sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
875 std::vector<PDFObjectElement*> aPages = GetPages();
876 if (aPages.empty())
878 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
879 return false;
882 size_t nPage = 0;
883 if (m_nSignaturePage < aPages.size())
885 nPage = m_nSignaturePage;
887 if (!aPages[nPage])
889 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
890 return false;
893 PDFObjectElement& rPage = *aPages[nPage];
894 sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
896 if (!WritePageObject(rPage, nAnnotId))
898 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
899 return false;
902 PDFReferenceElement* pRoot = nullptr;
903 if (!WriteCatalogObject(nAnnotId, pRoot))
905 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
906 return false;
909 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
910 WriteXRef(nXRefOffset, pRoot);
912 // Write startxref.
913 m_aEditBuffer.WriteCharPtr("startxref\n");
914 m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
915 m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
917 // Finalize the signature, now that we know the total file size.
918 // Calculate the length of the last byte range.
919 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
920 sal_Int64 nLastByteRangeLength
921 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
922 // Write the length to the buffer.
923 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
924 OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
925 m_aEditBuffer.WriteOString(aByteRangeBuffer);
927 // Create the PKCS#7 object.
928 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
929 if (!aDerEncoded.hasElements())
931 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
932 return false;
935 m_aEditBuffer.Seek(0);
936 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
937 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
938 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
940 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
941 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
942 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
943 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
945 OStringBuffer aCMSHexBuffer;
946 svl::crypto::Signing aSigning(xCertificate);
947 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
948 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
949 if (!aSigning.Sign(aCMSHexBuffer))
951 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
952 return false;
955 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
957 m_aEditBuffer.Seek(nSignatureContentOffset);
958 m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
960 return true;
963 bool PDFDocument::Write(SvStream& rStream)
965 m_aEditBuffer.Seek(0);
966 rStream.WriteStream(m_aEditBuffer);
967 return rStream.good();
970 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
971 std::vector<std::unique_ptr<PDFElement>>& rElements,
972 PDFObjectElement* pObjectElement)
974 // Last seen object token.
975 PDFObjectElement* pObject = pObjectElement;
976 PDFNameElement* pObjectKey = nullptr;
977 PDFObjectElement* pObjectStream = nullptr;
978 bool bInXRef = false;
979 // The next number will be an xref offset.
980 bool bInStartXRef = false;
981 // Dictionary depth, so we know when we're outside any dictionaries.
982 int nDepth = 0;
983 // Last seen array token that's outside any dictionaries.
984 PDFArrayElement* pArray = nullptr;
985 // If we're inside an obj/endobj pair.
986 bool bInObject = false;
988 while (true)
990 char ch;
991 rStream.ReadChar(ch);
992 if (rStream.eof())
993 break;
995 switch (ch)
997 case '%':
999 auto pComment = new PDFCommentElement(*this);
1000 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1001 rStream.SeekRel(-1);
1002 if (!rElements.back()->Read(rStream))
1004 SAL_WARN("vcl.filter",
1005 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1006 return false;
1008 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1009 && m_aEOFs.back() == rStream.Tell())
1011 // Found EOF and partial parsing requested, we're done.
1012 return true;
1014 break;
1016 case '<':
1018 // Dictionary or hex string.
1019 rStream.ReadChar(ch);
1020 rStream.SeekRel(-2);
1021 if (ch == '<')
1023 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1024 ++nDepth;
1026 else
1027 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1028 if (!rElements.back()->Read(rStream))
1030 SAL_WARN("vcl.filter",
1031 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1032 return false;
1034 break;
1036 case '>':
1038 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1039 --nDepth;
1040 rStream.SeekRel(-1);
1041 if (!rElements.back()->Read(rStream))
1043 SAL_WARN("vcl.filter",
1044 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1045 return false;
1047 break;
1049 case '[':
1051 auto pArr = new PDFArrayElement(pObject);
1052 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1053 if (nDepth == 0)
1055 // The array is attached directly, inform the object.
1056 pArray = pArr;
1057 if (pObject)
1059 pObject->SetArray(pArray);
1060 pObject->SetArrayOffset(rStream.Tell());
1063 ++nDepth;
1064 rStream.SeekRel(-1);
1065 if (!rElements.back()->Read(rStream))
1067 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1068 return false;
1070 break;
1072 case ']':
1074 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1075 --nDepth;
1076 rStream.SeekRel(-1);
1077 if (nDepth == 0)
1079 if (pObject)
1081 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1084 if (!rElements.back()->Read(rStream))
1086 SAL_WARN("vcl.filter",
1087 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1088 return false;
1090 break;
1092 case '/':
1094 auto pNameElement = new PDFNameElement();
1095 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1096 rStream.SeekRel(-1);
1097 if (!pNameElement->Read(rStream))
1099 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1100 return false;
1103 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1104 && pNameElement->GetValue() == "ObjStm")
1105 pObjectStream = pObject;
1106 else
1107 pObjectKey = pNameElement;
1108 break;
1110 case '(':
1112 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1113 rStream.SeekRel(-1);
1114 if (!rElements.back()->Read(rStream))
1116 SAL_WARN("vcl.filter",
1117 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1118 return false;
1120 break;
1122 default:
1124 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1125 || ch == '.')
1127 // Numbering object: an integer or a real.
1128 auto pNumberElement = new PDFNumberElement();
1129 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1130 rStream.SeekRel(-1);
1131 if (!pNumberElement->Read(rStream))
1133 SAL_WARN("vcl.filter",
1134 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1135 return false;
1137 if (bInStartXRef)
1139 bInStartXRef = false;
1140 m_aStartXRefs.push_back(pNumberElement->GetValue());
1142 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1143 if (it != m_aOffsetObjects.end())
1144 m_pXRefStream = it->second;
1146 else if (bInObject && !nDepth && pObject)
1147 // Number element inside an object, but outside a
1148 // dictionary / array: remember it.
1149 pObject->SetNumberElement(pNumberElement);
1151 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1153 // Possible keyword, like "obj".
1154 rStream.SeekRel(-1);
1155 OString aKeyword = ReadKeyword(rStream);
1157 bool bObj = aKeyword == "obj";
1158 if (bObj || aKeyword == "R")
1160 size_t nElements = rElements.size();
1161 if (nElements < 2)
1163 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1164 "tokens before 'obj' or 'R' keyword");
1165 return false;
1168 auto pObjectNumber
1169 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1170 auto pGenerationNumber
1171 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1172 if (!pObjectNumber || !pGenerationNumber)
1174 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1175 "generation number before 'obj' or 'R' keyword");
1176 return false;
1179 if (bObj)
1181 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1182 pGenerationNumber->GetValue());
1183 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1184 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1185 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1186 bInObject = true;
1188 else
1190 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1191 *pGenerationNumber);
1192 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1193 if (bInObject && nDepth > 0 && pObject)
1194 // Inform the object about a new in-dictionary reference.
1195 pObject->AddDictionaryReference(pReference);
1197 if (!rElements.back()->Read(rStream))
1199 SAL_WARN("vcl.filter",
1200 "PDFDocument::Tokenize: PDFElement::Read() failed");
1201 return false;
1204 else if (aKeyword == "stream")
1206 // Look up the length of the stream from the parent object's dictionary.
1207 size_t nLength = 0;
1208 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1210 // Iterate in reverse order.
1211 size_t nIndex = rElements.size() - nElement - 1;
1212 PDFElement* pElement = rElements[nIndex].get();
1213 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1214 if (!pObj)
1215 continue;
1217 PDFElement* pLookup = pObj->Lookup("Length");
1218 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1219 if (pReference)
1221 // Length is provided as a reference.
1222 nLength = pReference->LookupNumber(rStream);
1223 break;
1226 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1227 if (pNumber)
1229 // Length is provided directly.
1230 nLength = pNumber->GetValue();
1231 break;
1234 SAL_WARN(
1235 "vcl.filter",
1236 "PDFDocument::Tokenize: found no Length key for stream keyword");
1237 return false;
1240 PDFDocument::SkipLineBreaks(rStream);
1241 auto pStreamElement = new PDFStreamElement(nLength);
1242 if (pObject)
1243 pObject->SetStream(pStreamElement);
1244 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1245 if (!rElements.back()->Read(rStream))
1247 SAL_WARN("vcl.filter",
1248 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1249 return false;
1252 else if (aKeyword == "endstream")
1254 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1255 if (!rElements.back()->Read(rStream))
1257 SAL_WARN("vcl.filter",
1258 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1259 return false;
1262 else if (aKeyword == "endobj")
1264 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1265 if (!rElements.back()->Read(rStream))
1267 SAL_WARN("vcl.filter",
1268 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1269 return false;
1271 if (eMode == TokenizeMode::END_OF_OBJECT)
1273 // Found endobj and only object parsing was requested, we're done.
1274 return true;
1277 if (pObjectStream)
1279 // We're at the end of an object stream, parse the stored objects.
1280 pObjectStream->ParseStoredObjects();
1281 pObjectStream = nullptr;
1282 pObjectKey = nullptr;
1284 bInObject = false;
1286 else if (aKeyword == "true" || aKeyword == "false")
1287 rElements.push_back(std::unique_ptr<PDFElement>(
1288 new PDFBooleanElement(aKeyword.toBoolean())));
1289 else if (aKeyword == "null")
1290 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1291 else if (aKeyword == "xref")
1292 // Allow 'f' and 'n' keywords.
1293 bInXRef = true;
1294 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1297 else if (aKeyword == "trailer")
1299 auto pTrailer = new PDFTrailerElement(*this);
1301 // Make it possible to find this trailer later by offset.
1302 pTrailer->Read(rStream);
1303 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1305 // When reading till the first EOF token only, remember
1306 // just the first trailer token.
1307 if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1308 m_pTrailer = pTrailer;
1309 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1311 else if (aKeyword == "startxref")
1313 bInStartXRef = true;
1315 else
1317 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1318 << aKeyword << "' keyword at byte position "
1319 << rStream.Tell());
1320 return false;
1323 else
1325 auto uChar = static_cast<unsigned char>(ch);
1326 // Be more lenient and allow unexpected null char
1327 if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1329 SAL_WARN("vcl.filter",
1330 "PDFDocument::Tokenize: unexpected character with code "
1331 << sal_Int32(ch) << " at byte position " << rStream.Tell());
1332 return false;
1334 SAL_WARN_IF(uChar == 0, "vcl.filter",
1335 "PDFDocument::Tokenize: unexpected null character at "
1336 << rStream.Tell() << " - ignoring");
1338 break;
1343 return true;
1346 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1348 m_aIDObjects[nID] = pObject;
1351 bool PDFDocument::Read(SvStream& rStream)
1353 // Check file magic.
1354 std::vector<sal_Int8> aHeader(5);
1355 rStream.Seek(0);
1356 rStream.ReadBytes(aHeader.data(), aHeader.size());
1357 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1358 || aHeader[4] != '-')
1360 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1361 return false;
1364 // Allow later editing of the contents in-memory.
1365 rStream.Seek(0);
1366 m_aEditBuffer.WriteStream(rStream);
1368 // Look up the offset of the xref table.
1369 size_t nStartXRef = FindStartXRef(rStream);
1370 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1371 if (nStartXRef == 0)
1373 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1374 return false;
1376 while (true)
1378 rStream.Seek(nStartXRef);
1379 OString aKeyword = ReadKeyword(rStream);
1380 if (aKeyword.isEmpty())
1381 ReadXRefStream(rStream);
1383 else
1385 if (aKeyword != "xref")
1387 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1388 return false;
1390 ReadXRef(rStream);
1391 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1393 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1394 return false;
1398 PDFNumberElement* pPrev = nullptr;
1399 if (m_pTrailer)
1401 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1403 // Remember the offset of this trailer in the correct order. It's
1404 // possible that newer trailers don't have a larger offset.
1405 m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1407 else if (m_pXRefStream)
1408 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1409 if (pPrev)
1410 nStartXRef = pPrev->GetValue();
1412 // Reset state, except the edit buffer.
1413 m_aElements.clear();
1414 m_aOffsetObjects.clear();
1415 m_aIDObjects.clear();
1416 m_aStartXRefs.clear();
1417 m_aEOFs.clear();
1418 m_pTrailer = nullptr;
1419 m_pXRefStream = nullptr;
1420 if (!pPrev)
1421 break;
1424 // Then we can tokenize the stream.
1425 rStream.Seek(0);
1426 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1429 OString PDFDocument::ReadKeyword(SvStream& rStream)
1431 OStringBuffer aBuf;
1432 char ch;
1433 rStream.ReadChar(ch);
1434 if (rStream.eof())
1435 return OString();
1436 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1438 aBuf.append(ch);
1439 rStream.ReadChar(ch);
1440 if (rStream.eof())
1441 return aBuf.toString();
1443 rStream.SeekRel(-1);
1444 return aBuf.toString();
1447 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1449 // Find the "startxref" token, somewhere near the end of the document.
1450 std::vector<char> aBuf(1024);
1451 rStream.Seek(STREAM_SEEK_TO_END);
1452 if (rStream.Tell() > aBuf.size())
1453 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1454 else
1455 // The document is really short, then just read it from the start.
1456 rStream.Seek(0);
1457 size_t nBeforePeek = rStream.Tell();
1458 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1459 rStream.Seek(nBeforePeek);
1460 if (nSize != aBuf.size())
1461 aBuf.resize(nSize);
1462 OString aPrefix("startxref");
1463 // Find the last startxref at the end of the document.
1464 auto itLastValid = aBuf.end();
1465 auto it = aBuf.begin();
1466 while (true)
1468 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1469 if (it == aBuf.end())
1470 break;
1472 itLastValid = it;
1473 ++it;
1475 if (itLastValid == aBuf.end())
1477 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1478 return 0;
1481 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1482 if (rStream.eof())
1484 SAL_WARN("vcl.filter",
1485 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1486 return 0;
1489 PDFDocument::SkipWhitespace(rStream);
1490 PDFNumberElement aNumber;
1491 if (!aNumber.Read(rStream))
1492 return 0;
1493 return aNumber.GetValue();
1496 void PDFDocument::ReadXRefStream(SvStream& rStream)
1498 // Look up the stream length in the object dictionary.
1499 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1501 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1502 return;
1505 if (m_aElements.empty())
1507 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1508 return;
1511 PDFObjectElement* pObject = nullptr;
1512 for (const auto& pElement : m_aElements)
1514 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1516 pObject = pObj;
1517 break;
1520 if (!pObject)
1522 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1523 return;
1526 // So that the Prev key can be looked up later.
1527 m_pXRefStream = pObject;
1529 PDFElement* pLookup = pObject->Lookup("Length");
1530 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1531 if (!pNumber)
1533 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1534 return;
1536 sal_uInt64 nLength = pNumber->GetValue();
1538 // Look up the stream offset.
1539 PDFStreamElement* pStream = nullptr;
1540 for (const auto& pElement : m_aElements)
1542 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1544 pStream = pS;
1545 break;
1548 if (!pStream)
1550 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1551 return;
1554 // Read and decompress it.
1555 rStream.Seek(pStream->GetOffset());
1556 std::vector<char> aBuf(nLength);
1557 rStream.ReadBytes(aBuf.data(), aBuf.size());
1559 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1560 if (!pFilter)
1562 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1563 return;
1566 if (pFilter->GetValue() != "FlateDecode")
1568 SAL_WARN("vcl.filter",
1569 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1570 return;
1573 int nColumns = 1;
1574 int nPredictor = 1;
1575 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1577 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1578 auto it = rItems.find("Columns");
1579 if (it != rItems.end())
1580 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1581 nColumns = pColumns->GetValue();
1582 it = rItems.find("Predictor");
1583 if (it != rItems.end())
1584 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1585 nPredictor = pPredictor->GetValue();
1588 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1589 SvMemoryStream aStream;
1590 ZCodec aZCodec;
1591 aZCodec.BeginCompression();
1592 aZCodec.Decompress(aSource, aStream);
1593 if (!aZCodec.EndCompression())
1595 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1596 return;
1599 // Look up the first and the last entry we need to read.
1600 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1601 std::vector<size_t> aFirstObjects;
1602 std::vector<size_t> aNumberOfObjects;
1603 if (!pIndex)
1605 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1606 if (pSize)
1608 aFirstObjects.push_back(0);
1609 aNumberOfObjects.push_back(pSize->GetValue());
1611 else
1613 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1614 return;
1617 else
1619 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1620 size_t nFirstObject = 0;
1621 for (size_t i = 0; i < rIndexElements.size(); ++i)
1623 if (i % 2 == 0)
1625 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1626 if (!pFirstObject)
1628 SAL_WARN("vcl.filter",
1629 "PDFDocument::ReadXRefStream: Index has no first object");
1630 return;
1632 nFirstObject = pFirstObject->GetValue();
1633 continue;
1636 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1637 if (!pNumberOfObjects)
1639 SAL_WARN("vcl.filter",
1640 "PDFDocument::ReadXRefStream: Index has no number of objects");
1641 return;
1643 aFirstObjects.push_back(nFirstObject);
1644 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1648 // Look up the format of a single entry.
1649 const int nWSize = 3;
1650 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1651 if (!pW || pW->GetElements().size() < nWSize)
1653 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1654 return;
1656 int aW[nWSize];
1657 // First character is the (kind of) repeated predictor.
1658 int nLineLength = 1;
1659 for (size_t i = 0; i < nWSize; ++i)
1661 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1662 if (!pI)
1664 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1665 return;
1667 aW[i] = pI->GetValue();
1668 nLineLength += aW[i];
1671 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1673 SAL_WARN("vcl.filter",
1674 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1675 return;
1678 aStream.Seek(0);
1679 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1681 size_t nFirstObject = aFirstObjects[nSubSection];
1682 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1684 // This is the line as read from the stream.
1685 std::vector<unsigned char> aOrigLine(nLineLength);
1686 // This is the line as it appears after tweaking according to nPredictor.
1687 std::vector<unsigned char> aFilteredLine(nLineLength);
1688 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1690 size_t nIndex = nFirstObject + nEntry;
1692 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1693 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1695 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1696 "inconsistent with /DecodeParms/Predictor for object #"
1697 << nIndex);
1698 return;
1701 for (int i = 0; i < nLineLength; ++i)
1703 switch (nPredictor)
1705 case 1:
1706 // No prediction.
1707 break;
1708 case 12:
1709 // PNG prediction: up (on all rows).
1710 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1711 break;
1712 default:
1713 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1714 << nPredictor);
1715 return;
1719 // First character is already handled above.
1720 int nPos = 1;
1721 size_t nType = 0;
1722 // Start of the current field in the stream data.
1723 int nOffset = nPos;
1724 for (; nPos < nOffset + aW[0]; ++nPos)
1726 unsigned char nCh = aFilteredLine[nPos];
1727 nType = (nType << 8) + nCh;
1730 // Start of the object in the file stream.
1731 size_t nStreamOffset = 0;
1732 nOffset = nPos;
1733 for (; nPos < nOffset + aW[1]; ++nPos)
1735 unsigned char nCh = aFilteredLine[nPos];
1736 nStreamOffset = (nStreamOffset << 8) + nCh;
1739 // Generation number of the object.
1740 size_t nGenerationNumber = 0;
1741 nOffset = nPos;
1742 for (; nPos < nOffset + aW[2]; ++nPos)
1744 unsigned char nCh = aFilteredLine[nPos];
1745 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1748 // Ignore invalid nType.
1749 if (nType <= 2)
1751 if (m_aXRef.find(nIndex) == m_aXRef.end())
1753 XRefEntry aEntry;
1754 switch (nType)
1756 case 0:
1757 aEntry.SetType(XRefEntryType::FREE);
1758 break;
1759 case 1:
1760 aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1761 break;
1762 case 2:
1763 aEntry.SetType(XRefEntryType::COMPRESSED);
1764 break;
1766 aEntry.SetOffset(nStreamOffset);
1767 m_aXRef[nIndex] = aEntry;
1774 void PDFDocument::ReadXRef(SvStream& rStream)
1776 PDFDocument::SkipWhitespace(rStream);
1778 while (true)
1780 PDFNumberElement aFirstObject;
1781 if (!aFirstObject.Read(rStream))
1783 // Next token is not a number, it'll be the trailer.
1784 return;
1787 if (aFirstObject.GetValue() < 0)
1789 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1790 return;
1793 PDFDocument::SkipWhitespace(rStream);
1794 PDFNumberElement aNumberOfEntries;
1795 if (!aNumberOfEntries.Read(rStream))
1797 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1798 return;
1801 if (aNumberOfEntries.GetValue() < 0)
1803 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1804 return;
1807 size_t nSize = aNumberOfEntries.GetValue();
1808 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1810 size_t nIndex = aFirstObject.GetValue() + nEntry;
1811 PDFDocument::SkipWhitespace(rStream);
1812 PDFNumberElement aOffset;
1813 if (!aOffset.Read(rStream))
1815 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1816 return;
1819 PDFDocument::SkipWhitespace(rStream);
1820 PDFNumberElement aGenerationNumber;
1821 if (!aGenerationNumber.Read(rStream))
1823 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1824 return;
1827 PDFDocument::SkipWhitespace(rStream);
1828 OString aKeyword = ReadKeyword(rStream);
1829 if (aKeyword != "f" && aKeyword != "n")
1831 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1832 return;
1834 // xrefs are read in reverse order, so never update an existing
1835 // offset with an older one.
1836 if (m_aXRef.find(nIndex) == m_aXRef.end())
1838 XRefEntry aEntry;
1839 aEntry.SetOffset(aOffset.GetValue());
1840 // Initially only the first entry is dirty.
1841 if (nIndex == 0)
1842 aEntry.SetDirty(true);
1843 m_aXRef[nIndex] = aEntry;
1845 PDFDocument::SkipWhitespace(rStream);
1850 void PDFDocument::SkipWhitespace(SvStream& rStream)
1852 char ch = 0;
1854 while (true)
1856 rStream.ReadChar(ch);
1857 if (rStream.eof())
1858 break;
1860 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1862 rStream.SeekRel(-1);
1863 return;
1868 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1870 char ch = 0;
1872 while (true)
1874 rStream.ReadChar(ch);
1875 if (rStream.eof())
1876 break;
1878 if (ch != '\n' && ch != '\r')
1880 rStream.SeekRel(-1);
1881 return;
1886 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1888 auto it = m_aXRef.find(nIndex);
1889 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1891 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1892 << nIndex << ", but failed");
1893 return 0;
1896 return it->second.GetOffset();
1899 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1901 return m_aElements;
1904 /// Visits the page tree recursively, looking for page objects.
1905 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1907 auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1908 if (!pKids)
1910 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1911 return;
1914 pPages->setVisiting(true);
1916 for (const auto& pKid : pKids->GetElements())
1918 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1919 if (!pReference)
1920 continue;
1922 PDFObjectElement* pKidObject = pReference->LookupObject();
1923 if (!pKidObject)
1924 continue;
1926 // detect if visiting reenters itself
1927 if (pKidObject->alreadyVisiting())
1929 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1930 continue;
1933 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1934 if (pName && pName->GetValue() == "Pages")
1935 // Pages inside pages: recurse.
1936 visitPages(pKidObject, rRet);
1937 else
1938 // Found an actual page.
1939 rRet.push_back(pKidObject);
1942 pPages->setVisiting(false);
1945 PDFObjectElement* PDFDocument::GetCatalog()
1947 PDFReferenceElement* pRoot = nullptr;
1949 PDFTrailerElement* pTrailer = nullptr;
1950 if (!m_aTrailerOffsets.empty())
1952 // Get access to the latest trailer, and work with the keys of that
1953 // one.
1954 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1955 if (it != m_aOffsetTrailers.end())
1956 pTrailer = it->second;
1959 if (pTrailer)
1960 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1961 else if (m_pXRefStream)
1962 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1964 if (!pRoot)
1966 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1967 return nullptr;
1970 return pRoot->LookupObject();
1973 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1975 std::vector<PDFObjectElement*> aRet;
1977 PDFObjectElement* pCatalog = GetCatalog();
1978 if (!pCatalog)
1980 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1981 return aRet;
1984 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1985 if (!pPages)
1987 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1988 << ") has no pages");
1989 return aRet;
1992 visitPages(pPages, aRet);
1994 return aRet;
1997 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1999 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2001 std::vector<PDFObjectElement*> aRet;
2003 std::vector<PDFObjectElement*> aPages = GetPages();
2005 for (const auto& pPage : aPages)
2007 if (!pPage)
2008 continue;
2010 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2011 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2012 if (!pAnnots)
2014 // Annots is not an array, see if it's a reference to an object
2015 // with a direct array.
2016 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2017 if (pAnnotsRef)
2019 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2021 pAnnots = pAnnotsObject->GetArray();
2026 if (!pAnnots)
2027 continue;
2029 for (const auto& pAnnot : pAnnots->GetElements())
2031 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2032 if (!pReference)
2033 continue;
2035 PDFObjectElement* pAnnotObject = pReference->LookupObject();
2036 if (!pAnnotObject)
2037 continue;
2039 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2040 if (!pFT || pFT->GetValue() != "Sig")
2041 continue;
2043 aRet.push_back(pAnnotObject);
2047 return aRet;
2050 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2052 return svl::crypto::DecodeHexString(pElement->GetValue());
2055 OUString PDFDocument::DecodeHexStringUTF16BE(PDFHexStringElement const& rElement)
2057 std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
2058 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2059 // only the latter supported is here
2060 if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
2062 return OUString();
2064 OUStringBuffer buf(static_cast<unsigned int>(encoded.size() - 2));
2065 for (size_t i = 2; i < encoded.size(); i += 2)
2067 buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
2069 return buf.makeStringAndClear();
2072 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
2073 : m_rDoc(rDoc)
2077 bool PDFCommentElement::Read(SvStream& rStream)
2079 // Read from (including) the % char till (excluding) the end of the line/stream.
2080 OStringBuffer aBuf;
2081 char ch;
2082 rStream.ReadChar(ch);
2083 while (true)
2085 if (ch == '\n' || ch == '\r' || rStream.eof())
2087 m_aComment = aBuf.makeStringAndClear();
2089 if (m_aComment.startsWith("%%EOF"))
2091 sal_uInt64 nPos = rStream.Tell();
2092 if (ch == '\r')
2094 rStream.ReadChar(ch);
2095 rStream.SeekRel(-1);
2096 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2097 // behavior.
2098 if (ch == '\n')
2100 nPos += 1;
2103 m_rDoc.PushBackEOF(nPos);
2106 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2107 return true;
2109 aBuf.append(ch);
2110 rStream.ReadChar(ch);
2113 return false;
2116 PDFNumberElement::PDFNumberElement() = default;
2118 bool PDFNumberElement::Read(SvStream& rStream)
2120 OStringBuffer aBuf;
2121 m_nOffset = rStream.Tell();
2122 char ch;
2123 rStream.ReadChar(ch);
2124 if (rStream.eof())
2126 return false;
2128 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2130 rStream.SeekRel(-1);
2131 return false;
2133 while (!rStream.eof())
2135 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2136 && ch != '.')
2138 rStream.SeekRel(-1);
2139 m_nLength = rStream.Tell() - m_nOffset;
2140 m_fValue = aBuf.makeStringAndClear().toDouble();
2141 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2142 return true;
2144 aBuf.append(ch);
2145 rStream.ReadChar(ch);
2148 return false;
2151 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2153 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2155 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2157 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2159 bool PDFHexStringElement::Read(SvStream& rStream)
2161 char ch;
2162 rStream.ReadChar(ch);
2163 if (ch != '<')
2165 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2166 return false;
2168 rStream.ReadChar(ch);
2170 OStringBuffer aBuf;
2171 while (!rStream.eof())
2173 if (ch == '>')
2175 m_aValue = aBuf.makeStringAndClear();
2176 SAL_INFO("vcl.filter",
2177 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2178 return true;
2180 aBuf.append(ch);
2181 rStream.ReadChar(ch);
2184 return false;
2187 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2189 bool PDFLiteralStringElement::Read(SvStream& rStream)
2191 char nPrevCh = 0;
2192 char ch = 0;
2193 rStream.ReadChar(ch);
2194 if (ch != '(')
2196 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2197 return false;
2199 nPrevCh = ch;
2200 rStream.ReadChar(ch);
2202 // Start with 1 nesting level as we read a '(' above already.
2203 int nDepth = 1;
2204 OStringBuffer aBuf;
2205 while (!rStream.eof())
2207 if (ch == '(' && nPrevCh != '\\')
2208 ++nDepth;
2210 if (ch == ')' && nPrevCh != '\\')
2211 --nDepth;
2213 if (nDepth == 0)
2215 // ')' of the outermost '(' is reached.
2216 m_aValue = aBuf.makeStringAndClear();
2217 SAL_INFO("vcl.filter",
2218 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2219 return true;
2221 aBuf.append(ch);
2222 nPrevCh = ch;
2223 rStream.ReadChar(ch);
2226 return false;
2229 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2231 PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
2232 : m_rDoc(rDoc)
2233 , m_pDictionaryElement(nullptr)
2237 bool PDFTrailerElement::Read(SvStream& rStream)
2239 m_nOffset = rStream.Tell();
2240 return true;
2243 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2245 if (!m_pDictionaryElement)
2247 PDFObjectParser aParser(m_rDoc.GetElements());
2248 aParser.parse(this);
2250 if (!m_pDictionaryElement)
2251 return nullptr;
2252 return m_pDictionaryElement->LookupElement(rDictionaryKey);
2255 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2257 double PDFNumberElement::GetValue() const { return m_fValue; }
2259 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2260 : m_rDoc(rDoc)
2261 , m_fObjectValue(fObjectValue)
2262 , m_fGenerationValue(fGenerationValue)
2263 , m_pNumberElement(nullptr)
2264 , m_nDictionaryOffset(0)
2265 , m_nDictionaryLength(0)
2266 , m_pDictionaryElement(nullptr)
2267 , m_nArrayOffset(0)
2268 , m_nArrayLength(0)
2269 , m_pArrayElement(nullptr)
2270 , m_pStreamElement(nullptr)
2271 , m_bParsed(false)
2275 bool PDFObjectElement::Read(SvStream& /*rStream*/)
2277 SAL_INFO("vcl.filter",
2278 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2279 return true;
2282 PDFDictionaryElement::PDFDictionaryElement() = default;
2284 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2285 const OString& rKey)
2287 auto it = rDictionary.find(rKey);
2288 if (it == rDictionary.end())
2289 return nullptr;
2291 return it->second;
2294 PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey)
2296 auto pKey = dynamic_cast<PDFReferenceElement*>(
2297 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2298 if (!pKey)
2300 SAL_WARN("vcl.filter",
2301 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2302 << rDictionaryKey);
2303 return nullptr;
2306 return pKey->LookupObject();
2309 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2311 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2314 void PDFObjectElement::parseIfNecessary()
2316 if (!m_bParsed)
2318 if (!m_aElements.empty())
2320 // This is a stored object in an object stream.
2321 PDFObjectParser aParser(m_aElements);
2322 aParser.parse(this);
2324 else
2326 // Normal object: elements are stored as members of the document itself.
2327 PDFObjectParser aParser(m_rDoc.GetElements());
2328 aParser.parse(this);
2330 m_bParsed = true;
2334 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2336 parseIfNecessary();
2337 if (!m_pDictionaryElement)
2338 return nullptr;
2339 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2342 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2344 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2345 if (!pKey)
2347 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2348 << rDictionaryKey);
2349 return nullptr;
2352 return pKey->LookupObject();
2355 double PDFObjectElement::GetObjectValue() const { return m_fObjectValue; }
2357 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2359 m_nDictionaryOffset = nDictionaryOffset;
2362 sal_uInt64 PDFObjectElement::GetDictionaryOffset()
2364 parseIfNecessary();
2365 return m_nDictionaryOffset;
2368 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2370 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2372 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2374 m_aDictionaryKeyOffset[rKey] = nOffset;
2377 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2379 m_aDictionaryKeyValueLength[rKey] = nLength;
2382 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2384 auto it = m_aDictionaryKeyOffset.find(rKey);
2385 if (it == m_aDictionaryKeyOffset.end())
2386 return 0;
2388 return it->second;
2391 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2393 auto it = m_aDictionaryKeyValueLength.find(rKey);
2394 if (it == m_aDictionaryKeyValueLength.end())
2395 return 0;
2397 return it->second;
2400 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2402 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2404 m_nDictionaryLength = nDictionaryLength;
2407 sal_uInt64 PDFObjectElement::GetDictionaryLength()
2409 parseIfNecessary();
2410 return m_nDictionaryLength;
2413 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2415 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2417 PDFDictionaryElement* PDFObjectElement::GetDictionary()
2419 parseIfNecessary();
2420 return m_pDictionaryElement;
2423 void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
2425 m_pDictionaryElement = pDictionaryElement;
2428 void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
2430 m_pNumberElement = pNumberElement;
2433 PDFNumberElement* PDFObjectElement::GetNumberElement() const { return m_pNumberElement; }
2435 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2437 return m_aDictionaryReferences;
2440 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
2442 m_aDictionaryReferences.push_back(pReference);
2445 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2447 parseIfNecessary();
2448 return m_pDictionaryElement->GetItems();
2451 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2453 void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
2455 m_pStreamElement = pStreamElement;
2458 PDFStreamElement* PDFObjectElement::GetStream() const { return m_pStreamElement; }
2460 PDFArrayElement* PDFObjectElement::GetArray()
2462 parseIfNecessary();
2463 return m_pArrayElement;
2466 void PDFObjectElement::ParseStoredObjects()
2468 if (!m_pStreamElement)
2470 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2471 return;
2474 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2475 if (!pType || pType->GetValue() != "ObjStm")
2477 if (!pType)
2478 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2479 else
2480 SAL_WARN("vcl.filter",
2481 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2482 return;
2485 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2486 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2488 if (!pFilter)
2489 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2490 else
2491 SAL_WARN("vcl.filter",
2492 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2493 return;
2496 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2497 if (!pFirst)
2499 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2500 return;
2503 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2504 if (!pN)
2506 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2507 return;
2509 size_t nN = pN->GetValue();
2511 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2512 if (!pLength)
2514 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2515 return;
2517 size_t nLength = pLength->GetValue();
2519 // Read and decompress it.
2520 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2521 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2522 std::vector<char> aBuf(nLength);
2523 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2524 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2525 SvMemoryStream aStream;
2526 ZCodec aZCodec;
2527 aZCodec.BeginCompression();
2528 aZCodec.Decompress(aSource, aStream);
2529 if (!aZCodec.EndCompression())
2531 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2532 return;
2535 nLength = aStream.TellEnd();
2536 aStream.Seek(0);
2537 std::vector<size_t> aObjNums;
2538 std::vector<size_t> aOffsets;
2539 std::vector<size_t> aLengths;
2540 // First iterate over and find out the lengths.
2541 for (size_t nObject = 0; nObject < nN; ++nObject)
2543 PDFNumberElement aObjNum;
2544 if (!aObjNum.Read(aStream))
2546 SAL_WARN("vcl.filter",
2547 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2548 return;
2550 aObjNums.push_back(aObjNum.GetValue());
2552 PDFDocument::SkipWhitespace(aStream);
2554 PDFNumberElement aByteOffset;
2555 if (!aByteOffset.Read(aStream))
2557 SAL_WARN("vcl.filter",
2558 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2559 return;
2561 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2563 if (aOffsets.size() > 1)
2564 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2565 if (nObject + 1 == nN)
2566 aLengths.push_back(nLength - aOffsets.back());
2568 PDFDocument::SkipWhitespace(aStream);
2571 // Now create streams with the proper length and tokenize the data.
2572 for (size_t nObject = 0; nObject < nN; ++nObject)
2574 size_t nObjNum = aObjNums[nObject];
2575 size_t nOffset = aOffsets[nObject];
2576 size_t nLen = aLengths[nObject];
2578 aStream.Seek(nOffset);
2579 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2580 PDFObjectElement* pStored = m_aStoredElements.back().get();
2582 aBuf.clear();
2583 aBuf.resize(nLen);
2584 aStream.ReadBytes(aBuf.data(), aBuf.size());
2585 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2587 m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2588 pStored);
2589 // This is how references know the object is stored inside this object stream.
2590 m_rDoc.SetIDObject(nObjNum, pStored);
2592 // Store the stream of the object in the object stream for later use.
2593 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2594 aStoredStream.Seek(0);
2595 pStreamBuffer->WriteStream(aStoredStream);
2596 pStored->SetStreamBuffer(pStreamBuffer);
2600 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2602 return m_aElements;
2605 SvMemoryStream* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer.get(); }
2607 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2609 m_pStreamBuffer = std::move(pStreamBuffer);
2612 PDFDocument& PDFObjectElement::GetDocument() { return m_rDoc; }
2614 PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
2615 PDFNumberElement const& rGeneration)
2616 : m_rDoc(rDoc)
2617 , m_fObjectValue(rObject.GetValue())
2618 , m_fGenerationValue(rGeneration.GetValue())
2619 , m_rObject(rObject)
2623 PDFNumberElement& PDFReferenceElement::GetObjectElement() const { return m_rObject; }
2625 bool PDFReferenceElement::Read(SvStream& rStream)
2627 SAL_INFO("vcl.filter",
2628 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2629 m_nOffset = rStream.Tell();
2630 return true;
2633 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2635 double PDFReferenceElement::LookupNumber(SvStream& rStream) const
2637 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2638 if (nOffset == 0)
2640 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2641 << m_fObjectValue);
2642 return 0;
2645 sal_uInt64 nOrigPos = rStream.Tell();
2646 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2648 rStream.Seek(nOffset);
2650 PDFDocument::SkipWhitespace(rStream);
2651 PDFNumberElement aNumber;
2652 bool bRet = aNumber.Read(rStream);
2653 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2655 SAL_WARN("vcl.filter",
2656 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2657 return 0;
2662 PDFDocument::SkipWhitespace(rStream);
2663 PDFNumberElement aNumber;
2664 bool bRet = aNumber.Read(rStream);
2665 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2667 SAL_WARN("vcl.filter",
2668 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2669 return 0;
2674 PDFDocument::SkipWhitespace(rStream);
2675 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2676 if (aKeyword != "obj")
2678 SAL_WARN("vcl.filter",
2679 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2680 return 0;
2684 PDFDocument::SkipWhitespace(rStream);
2685 PDFNumberElement aNumber;
2686 if (!aNumber.Read(rStream))
2688 SAL_WARN("vcl.filter",
2689 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2690 return 0;
2693 return aNumber.GetValue();
2696 PDFObjectElement* PDFReferenceElement::LookupObject()
2698 return m_rDoc.LookupObject(m_fObjectValue);
2701 PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber)
2703 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2705 if (itIDObjects != m_aIDObjects.end())
2706 return itIDObjects->second;
2708 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2709 return nullptr;
2712 SvMemoryStream& PDFDocument::GetEditBuffer() { return m_aEditBuffer; }
2714 int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue; }
2716 int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue; }
2718 bool PDFDictionaryElement::Read(SvStream& rStream)
2720 char ch;
2721 rStream.ReadChar(ch);
2722 if (ch != '<')
2724 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2725 return false;
2728 if (rStream.eof())
2730 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2731 return false;
2734 rStream.ReadChar(ch);
2735 if (ch != '<')
2737 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2738 return false;
2741 m_nLocation = rStream.Tell();
2743 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2745 return true;
2748 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2750 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
2752 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2754 m_nLocation = rStream.Tell();
2755 char ch;
2756 rStream.ReadChar(ch);
2757 if (ch != '>')
2759 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2760 return false;
2763 if (rStream.eof())
2765 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2766 return false;
2769 rStream.ReadChar(ch);
2770 if (ch != '>')
2772 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2773 return false;
2776 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2778 return true;
2781 PDFNameElement::PDFNameElement() = default;
2783 bool PDFNameElement::Read(SvStream& rStream)
2785 char ch;
2786 rStream.ReadChar(ch);
2787 if (ch != '/')
2789 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2790 return false;
2792 m_nLocation = rStream.Tell();
2794 if (rStream.eof())
2796 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2797 return false;
2800 // Read till the first white-space.
2801 OStringBuffer aBuf;
2802 rStream.ReadChar(ch);
2803 while (!rStream.eof())
2805 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2806 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2808 rStream.SeekRel(-1);
2809 m_aValue = aBuf.makeStringAndClear();
2810 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2811 return true;
2813 aBuf.append(ch);
2814 rStream.ReadChar(ch);
2817 return false;
2820 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2822 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2824 PDFStreamElement::PDFStreamElement(size_t nLength)
2825 : m_nLength(nLength)
2826 , m_nOffset(0)
2830 bool PDFStreamElement::Read(SvStream& rStream)
2832 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2833 m_nOffset = rStream.Tell();
2834 std::vector<unsigned char> aBytes(m_nLength);
2835 rStream.ReadBytes(aBytes.data(), aBytes.size());
2836 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2838 return rStream.good();
2841 SvMemoryStream& PDFStreamElement::GetMemory() { return m_aMemory; }
2843 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2845 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2847 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2849 PDFArrayElement::PDFArrayElement(PDFObjectElement* pObject)
2850 : m_pObject(pObject)
2854 bool PDFArrayElement::Read(SvStream& rStream)
2856 char ch;
2857 rStream.ReadChar(ch);
2858 if (ch != '[')
2860 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2861 return false;
2864 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2866 return true;
2869 void PDFArrayElement::PushBack(PDFElement* pElement)
2871 if (m_pObject)
2872 SAL_INFO("vcl.filter",
2873 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2874 m_aElements.push_back(pElement);
2877 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2879 PDFEndArrayElement::PDFEndArrayElement() = default;
2881 bool PDFEndArrayElement::Read(SvStream& rStream)
2883 m_nOffset = rStream.Tell();
2884 char ch;
2885 rStream.ReadChar(ch);
2886 if (ch != ']')
2888 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2889 return false;
2892 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2894 return true;
2897 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2899 // PDFObjectParser
2901 size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2903 // The index of last parsed element
2904 size_t nReturnIndex = 0;
2906 pParsingElement->setParsing(true);
2908 comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2910 // Current object, if root is an object, else nullptr
2911 auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2912 auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2914 // Current dictionary, if root is an dictionary, else nullptr
2915 auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2917 // Current parsing array, if root is an array, else nullptr
2918 auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2920 // Find out where the dictionary for this object starts.
2921 size_t nIndex = nStartIndex;
2922 for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2924 if (mrElements[i].get() == pParsingElement)
2926 nIndex = i;
2927 break;
2931 OString aName;
2932 sal_uInt64 nNameOffset = 0;
2933 std::vector<PDFNumberElement*> aNumbers;
2935 sal_uInt64 nDictionaryOffset = 0;
2937 // Current depth; 1 is current
2938 int nDepth = 0;
2940 for (size_t i = nIndex; i < mrElements.size(); ++i)
2942 auto* pCurrentElement = mrElements[i].get();
2944 // Dictionary tokens can be nested, track enter/leave.
2945 if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2947 // Handle previously stored number
2948 if (!aNumbers.empty())
2950 if (pParsingDictionary)
2952 PDFNumberElement* pNumber = aNumbers.back();
2953 sal_uInt64 nLength
2954 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2956 pParsingDictionary->insert(aName, pNumber);
2957 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2958 pParsingDictionary->SetKeyValueLength(aName, nLength);
2960 else if (pParsingArray)
2962 for (auto& pNumber : aNumbers)
2963 pParsingArray->PushBack(pNumber);
2965 else
2967 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2969 aName.clear();
2970 aNumbers.clear();
2973 nDepth++;
2975 if (nDepth == 1) // pParsingDictionary is the current one
2977 // First dictionary start, track start offset.
2978 nDictionaryOffset = pCurrentDictionary->GetLocation();
2980 if (pParsingObject)
2982 // Then the toplevel dictionary of the object.
2983 pParsingObject->SetDictionary(pCurrentDictionary);
2984 pParsingObject->SetDictionaryOffset(nDictionaryOffset);
2985 pParsingDictionary = pCurrentDictionary;
2987 else if (pParsingTrailer)
2989 pParsingTrailer->SetDictionary(pCurrentDictionary);
2990 pParsingDictionary = pCurrentDictionary;
2993 else if (!pCurrentDictionary->alreadyParsing())
2995 if (pParsingArray)
2997 pParsingArray->PushBack(pCurrentDictionary);
2999 else if (pParsingDictionary)
3001 // Dictionary toplevel value.
3002 pParsingDictionary->insert(aName, pCurrentDictionary);
3004 else
3006 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3008 // Nested dictionary.
3009 const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
3010 i = std::max(i, nNextElementIndex - 1);
3013 else if (auto pCurrentEndDictionary
3014 = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
3016 // Handle previously stored number
3017 if (!aNumbers.empty())
3019 if (pParsingDictionary)
3021 PDFNumberElement* pNumber = aNumbers.back();
3022 sal_uInt64 nLength
3023 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3025 pParsingDictionary->insert(aName, pNumber);
3026 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3027 pParsingDictionary->SetKeyValueLength(aName, nLength);
3029 else if (pParsingArray)
3031 for (auto& pNumber : aNumbers)
3032 pParsingArray->PushBack(pNumber);
3034 else
3036 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3038 aName.clear();
3039 aNumbers.clear();
3042 if (pParsingDictionary)
3044 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3045 sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3046 pParsingDictionary->SetKeyValueLength(aName, nLength);
3047 aName.clear();
3050 if (nDepth == 1) // did the parsing ended
3052 // Last dictionary end, track length and stop parsing.
3053 if (pParsingObject)
3055 sal_uInt64 nDictionaryLength
3056 = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3057 pParsingObject->SetDictionaryLength(nDictionaryLength);
3059 nReturnIndex = i;
3060 break;
3063 nDepth--;
3065 else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3067 // Handle previously stored number
3068 if (!aNumbers.empty())
3070 if (pParsingDictionary)
3072 PDFNumberElement* pNumber = aNumbers.back();
3074 sal_uInt64 nLength
3075 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3076 pParsingDictionary->insert(aName, pNumber);
3077 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3078 pParsingDictionary->SetKeyValueLength(aName, nLength);
3080 else if (pParsingArray)
3082 for (auto& pNumber : aNumbers)
3083 pParsingArray->PushBack(pNumber);
3085 else
3087 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3089 aName.clear();
3090 aNumbers.clear();
3093 nDepth++;
3094 if (nDepth == 1) // pParsingDictionary is the current one
3096 if (pParsingObject)
3098 pParsingObject->SetArray(pCurrentArray);
3099 pParsingArray = pCurrentArray;
3102 else if (!pCurrentArray->alreadyParsing())
3104 if (pParsingArray)
3106 // Array is toplevel
3107 pParsingArray->PushBack(pCurrentArray);
3109 else if (pParsingDictionary)
3111 // Dictionary toplevel value.
3112 pParsingDictionary->insert(aName, pCurrentArray);
3115 const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3117 // ensure we go forwards and not endlessly loop
3118 i = std::max(i, nNextElementIndex - 1);
3121 else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3123 // Handle previously stored number
3124 if (!aNumbers.empty())
3126 if (pParsingDictionary)
3128 PDFNumberElement* pNumber = aNumbers.back();
3130 sal_uInt64 nLength
3131 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3132 pParsingDictionary->insert(aName, pNumber);
3133 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3134 pParsingDictionary->SetKeyValueLength(aName, nLength);
3136 else if (pParsingArray)
3138 for (auto& pNumber : aNumbers)
3139 pParsingArray->PushBack(pNumber);
3141 else
3143 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3145 aName.clear();
3146 aNumbers.clear();
3149 if (nDepth == 1) // did the pParsing ended
3151 // Last array end, track length and stop parsing.
3152 nReturnIndex = i;
3153 break;
3155 else
3157 if (pParsingDictionary)
3159 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3160 // Include the ending ']' in the length of the key - (array)value pair length.
3161 sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3162 pParsingDictionary->SetKeyValueLength(aName, nLength);
3163 aName.clear();
3166 nDepth--;
3168 else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3170 // Handle previously stored number
3171 if (!aNumbers.empty())
3173 if (pParsingDictionary)
3175 PDFNumberElement* pNumber = aNumbers.back();
3177 sal_uInt64 nLength
3178 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3179 pParsingDictionary->insert(aName, pNumber);
3180 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3181 pParsingDictionary->SetKeyValueLength(aName, nLength);
3183 else if (pParsingArray)
3185 for (auto& pNumber : aNumbers)
3186 pParsingArray->PushBack(pNumber);
3188 aName.clear();
3189 aNumbers.clear();
3192 // Now handle name
3193 if (pParsingArray)
3195 // if we are in an array, just push the name to array
3196 pParsingArray->PushBack(pCurrentName);
3198 else if (pParsingDictionary)
3200 // if we are in a dictionary, we need to store the name as a possible key
3201 if (aName.isEmpty())
3203 aName = pCurrentName->GetValue();
3204 nNameOffset = pCurrentName->GetLocation();
3206 else
3208 sal_uInt64 nKeyLength
3209 = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3210 pParsingDictionary->insert(aName, pCurrentName);
3211 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3212 pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3213 aName.clear();
3217 else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3219 if (pParsingArray)
3221 pParsingArray->PushBack(pReference);
3223 else if (pParsingDictionary)
3225 sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3226 pParsingDictionary->insert(aName, pReference);
3227 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3228 pParsingDictionary->SetKeyValueLength(aName, nLength);
3229 aName.clear();
3231 else
3233 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3235 aNumbers.clear();
3237 else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3239 if (pParsingArray)
3241 pParsingArray->PushBack(pLiteralString);
3243 else if (pParsingDictionary)
3245 pParsingDictionary->insert(aName, pLiteralString);
3246 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3247 aName.clear();
3249 else
3251 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3254 else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3256 if (pParsingArray)
3258 pParsingArray->PushBack(pBoolean);
3260 else if (pParsingDictionary)
3262 pParsingDictionary->insert(aName, pBoolean);
3263 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3264 aName.clear();
3266 else
3268 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3271 else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3273 if (pParsingArray)
3275 pParsingArray->PushBack(pHexString);
3277 else if (pParsingDictionary)
3279 pParsingDictionary->insert(aName, pHexString);
3280 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3281 aName.clear();
3284 else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3286 // Just remember this, so that in case it's not a reference parameter,
3287 // we can handle it later.
3288 aNumbers.push_back(pNumberElement);
3290 else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3292 // parsing of the object is finished
3293 break;
3295 else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3296 || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3298 continue;
3300 else
3302 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3306 return nReturnIndex;
3309 } // namespace vcl
3311 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */