bump product version to 6.4.0.3
[LibreOffice.git] / vcl / source / filter / ipdf / pdfdocument.cxx
blob02bde3fcaedb493f54dad89882aaf732348f79b8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <vcl/filter/pdfdocument.hxx>
12 #include <map>
13 #include <memory>
14 #include <vector>
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
19 #include <comphelper/scopeguard.hxx>
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
30 using namespace com::sun::star;
32 namespace vcl
34 namespace filter
36 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
38 class PDFTrailerElement;
40 /// A one-liner comment.
41 class PDFCommentElement : public PDFElement
43 PDFDocument& m_rDoc;
44 OString m_aComment;
46 public:
47 explicit PDFCommentElement(PDFDocument& rDoc);
48 bool Read(SvStream& rStream) override;
51 class PDFReferenceElement;
53 /// End of a dictionary: '>>'.
54 class PDFEndDictionaryElement : public PDFElement
56 /// Offset before the '>>' token.
57 sal_uInt64 m_nLocation = 0;
59 public:
60 PDFEndDictionaryElement();
61 bool Read(SvStream& rStream) override;
62 sal_uInt64 GetLocation() const;
65 /// End of a stream: 'endstream' keyword.
66 class PDFEndStreamElement : public PDFElement
68 public:
69 bool Read(SvStream& rStream) override;
72 /// End of an object: 'endobj' keyword.
73 class PDFEndObjectElement : public PDFElement
75 public:
76 bool Read(SvStream& rStream) override;
79 /// End of an array: ']'.
80 class PDFEndArrayElement : public PDFElement
82 /// Location before the ']' token.
83 sal_uInt64 m_nOffset = 0;
85 public:
86 PDFEndArrayElement();
87 bool Read(SvStream& rStream) override;
88 sal_uInt64 GetOffset() const;
91 /// Boolean object: a 'true' or a 'false'.
92 class PDFBooleanElement : public PDFElement
94 public:
95 explicit PDFBooleanElement(bool bValue);
96 bool Read(SvStream& rStream) override;
99 /// Null object: the 'null' singleton.
100 class PDFNullElement : public PDFElement
102 public:
103 bool Read(SvStream& rStream) override;
106 /// The trailer singleton is at the end of the doc.
107 class PDFTrailerElement : public PDFElement
109 PDFDocument& m_rDoc;
110 std::map<OString, PDFElement*> m_aDictionary;
111 /// Location of the end of the trailer token.
112 sal_uInt64 m_nOffset = 0;
114 public:
115 explicit PDFTrailerElement(PDFDocument& rDoc);
116 bool Read(SvStream& rStream) override;
117 PDFElement* Lookup(const OString& rDictionaryKey);
118 sal_uInt64 GetLocation() const;
121 XRefEntry::XRefEntry() = default;
123 PDFDocument::PDFDocument() = default;
125 bool PDFDocument::RemoveSignature(size_t nPosition)
127 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
128 if (nPosition >= aSignatures.size())
130 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
131 return false;
134 if (aSignatures.size() != m_aEOFs.size() - 1)
136 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
137 "and incremental updates");
138 return false;
141 // The EOF offset is the end of the original file, without the signature at
142 // nPosition.
143 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
144 // Drop all bytes after the current position.
145 m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
147 return m_aEditBuffer.good();
150 sal_uInt32 PDFDocument::GetNextSignature()
152 sal_uInt32 nRet = 0;
153 for (const auto& pSignature : GetSignatureWidgets())
155 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
156 if (!pT)
157 continue;
159 const OString& rValue = pT->GetValue();
160 const OString aPrefix = "Signature";
161 if (!rValue.startsWith(aPrefix))
162 continue;
164 nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
167 return nRet + 1;
170 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
171 sal_uInt64& rLastByteRangeOffset,
172 sal_Int64& rContentOffset)
174 // Write signature object.
175 sal_Int32 nSignatureId = m_aXRef.size();
176 XRefEntry aSignatureEntry;
177 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
178 aSignatureEntry.SetDirty(true);
179 m_aXRef[nSignatureId] = aSignatureEntry;
180 OStringBuffer aSigBuffer;
181 aSigBuffer.append(nSignatureId);
182 aSigBuffer.append(" 0 obj\n");
183 aSigBuffer.append("<</Contents <");
184 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
185 // Reserve space for the PKCS#7 object.
186 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
187 comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
188 aSigBuffer.append(aContentFiller.makeStringAndClear());
189 aSigBuffer.append(">\n/Type/Sig/SubFilter");
190 if (bAdES)
191 aSigBuffer.append("/ETSI.CAdES.detached");
192 else
193 aSigBuffer.append("/adbe.pkcs7.detached");
195 // Time of signing.
196 aSigBuffer.append(" /M (");
197 aSigBuffer.append(vcl::PDFWriter::GetDateTime());
198 aSigBuffer.append(")");
200 // Byte range: we can write offset1-length1 and offset2 right now, will
201 // write length2 later.
202 aSigBuffer.append(" /ByteRange [ 0 ");
203 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
204 aSigBuffer.append(rContentOffset - 1);
205 aSigBuffer.append(" ");
206 aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
207 aSigBuffer.append(" ");
208 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
209 // We don't know how many bytes we need for the last ByteRange value, this
210 // should be enough.
211 OStringBuffer aByteRangeFiller;
212 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
213 aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
214 // Finish the Sig obj.
215 aSigBuffer.append(" /Filter/Adobe.PPKMS");
217 if (!rDescription.isEmpty())
219 aSigBuffer.append("/Reason<");
220 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
221 aSigBuffer.append(">");
224 aSigBuffer.append(" >>\nendobj\n\n");
225 m_aEditBuffer.WriteOString(aSigBuffer.toString());
227 return nSignatureId;
230 sal_Int32 PDFDocument::WriteAppearanceObject()
232 // Write appearance object.
233 sal_Int32 nAppearanceId = m_aXRef.size();
234 XRefEntry aAppearanceEntry;
235 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
236 aAppearanceEntry.SetDirty(true);
237 m_aXRef[nAppearanceId] = aAppearanceEntry;
238 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
239 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
240 m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
241 m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
242 m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
244 return nAppearanceId;
247 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
248 sal_Int32 nAppearanceId)
250 // Decide what identifier to use for the new signature.
251 sal_uInt32 nNextSignature = GetNextSignature();
253 // Write the Annot object, references nSignatureId and nAppearanceId.
254 sal_Int32 nAnnotId = m_aXRef.size();
255 XRefEntry aAnnotEntry;
256 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
257 aAnnotEntry.SetDirty(true);
258 m_aXRef[nAnnotId] = aAnnotEntry;
259 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
260 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
261 m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
262 m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n");
263 m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
264 m_aEditBuffer.WriteCharPtr("/P ");
265 m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
266 m_aEditBuffer.WriteCharPtr(" 0 R\n");
267 m_aEditBuffer.WriteCharPtr("/T(Signature");
268 m_aEditBuffer.WriteUInt32AsString(nNextSignature);
269 m_aEditBuffer.WriteCharPtr(")\n");
270 m_aEditBuffer.WriteCharPtr("/V ");
271 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
272 m_aEditBuffer.WriteCharPtr(" 0 R\n");
273 m_aEditBuffer.WriteCharPtr("/DV ");
274 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
275 m_aEditBuffer.WriteCharPtr(" 0 R\n");
276 m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
277 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
278 m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
279 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
281 return nAnnotId;
284 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
286 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
287 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
288 if (pAnnotsReference)
290 // Write the updated Annots key of the Page object.
291 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
292 if (!pAnnotsObject)
294 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
295 return false;
298 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
299 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
300 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
301 m_aXRef[nAnnotsId].SetDirty(true);
302 m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
303 m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
305 // Write existing references.
306 PDFArrayElement* pArray = pAnnotsObject->GetArray();
307 if (!pArray)
309 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
310 return false;
313 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
315 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
316 if (!pReference)
317 continue;
319 if (i)
320 m_aEditBuffer.WriteCharPtr(" ");
321 m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
322 m_aEditBuffer.WriteCharPtr(" 0 R");
324 // Write our reference.
325 m_aEditBuffer.WriteCharPtr(" ");
326 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
327 m_aEditBuffer.WriteCharPtr(" 0 R");
329 m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
331 else
333 // Write the updated first page object, references nAnnotId.
334 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
335 if (nFirstPageId >= m_aXRef.size())
337 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
338 return false;
340 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
341 m_aXRef[nFirstPageId].SetDirty(true);
342 m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
343 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
344 m_aEditBuffer.WriteCharPtr("<<");
345 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
346 if (!pAnnotsArray)
348 // No Annots key, just write the key with a single reference.
349 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
350 + rFirstPage.GetDictionaryOffset(),
351 rFirstPage.GetDictionaryLength());
352 m_aEditBuffer.WriteCharPtr("/Annots[");
353 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
354 m_aEditBuffer.WriteCharPtr(" 0 R]");
356 else
358 // Annots key is already there, insert our reference at the end.
359 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
361 // Offset right before the end of the Annots array.
362 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
363 + pDictionary->GetKeyValueLength("Annots") - 1;
364 // Length of beginning of the dictionary -> Annots end.
365 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
366 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
367 + rFirstPage.GetDictionaryOffset(),
368 nAnnotsBeforeEndLength);
369 m_aEditBuffer.WriteCharPtr(" ");
370 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
371 m_aEditBuffer.WriteCharPtr(" 0 R");
372 // Length of Annots end -> end of the dictionary.
373 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
374 + rFirstPage.GetDictionaryLength()
375 - nAnnotsEndOffset;
376 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
377 + nAnnotsEndOffset,
378 nAnnotsAfterEndLength);
380 m_aEditBuffer.WriteCharPtr(">>");
381 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
384 return true;
387 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
389 if (m_pXRefStream)
390 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
391 else
393 if (!m_pTrailer)
395 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
396 return false;
398 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
400 if (!pRoot)
402 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
403 return false;
405 PDFObjectElement* pCatalog = pRoot->LookupObject();
406 if (!pCatalog)
408 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
409 return false;
411 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
412 if (nCatalogId >= m_aXRef.size())
414 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
415 return false;
417 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
418 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
419 if (pAcroFormReference)
421 // Write the updated AcroForm key of the Catalog object.
422 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
423 if (!pAcroFormObject)
425 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
426 return false;
429 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
430 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
431 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
432 m_aXRef[nAcroFormId].SetDirty(true);
433 m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
434 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
436 // If this is nullptr, then the AcroForm object is not in an object stream.
437 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
439 if (!pAcroFormObject->Lookup("Fields"))
441 SAL_WARN("vcl.filter",
442 "PDFDocument::Sign: AcroForm object without required Fields key");
443 return false;
446 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
447 if (!pAcroFormDictionary)
449 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
450 return false;
453 // Offset right before the end of the Fields array.
454 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
455 + pAcroFormDictionary->GetKeyValueLength("Fields")
456 - strlen("]");
457 // Length of beginning of the object dictionary -> Fields end.
458 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
459 if (pStreamBuffer)
460 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
461 else
463 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
464 m_aEditBuffer.WriteCharPtr("<<");
465 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
466 + pAcroFormObject->GetDictionaryOffset(),
467 nFieldsBeforeEndLength);
470 // Append our reference at the end of the Fields array.
471 m_aEditBuffer.WriteCharPtr(" ");
472 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
473 m_aEditBuffer.WriteCharPtr(" 0 R");
475 // Length of Fields end -> end of the object dictionary.
476 if (pStreamBuffer)
478 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
479 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
480 + nFieldsEndOffset,
481 nFieldsAfterEndLength);
483 else
485 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
486 + pAcroFormObject->GetDictionaryLength()
487 - nFieldsEndOffset;
488 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
489 + nFieldsEndOffset,
490 nFieldsAfterEndLength);
491 m_aEditBuffer.WriteCharPtr(">>");
494 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
496 else
498 // Write the updated Catalog object, references nAnnotId.
499 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
500 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
501 m_aXRef[nCatalogId].SetDirty(true);
502 m_aEditBuffer.WriteUInt32AsString(nCatalogId);
503 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
504 m_aEditBuffer.WriteCharPtr("<<");
505 if (!pAcroFormDictionary)
507 // No AcroForm key, assume no signatures.
508 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
509 + pCatalog->GetDictionaryOffset(),
510 pCatalog->GetDictionaryLength());
511 m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
512 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
513 m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
515 else
517 // AcroForm key is already there, insert our reference at the Fields end.
518 auto it = pAcroFormDictionary->GetItems().find("Fields");
519 if (it == pAcroFormDictionary->GetItems().end())
521 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
522 return false;
525 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
526 if (!pFields)
528 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
529 return false;
532 // Offset right before the end of the Fields array.
533 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
534 + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
535 // Length of beginning of the Catalog dictionary -> Fields end.
536 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
537 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
538 + pCatalog->GetDictionaryOffset(),
539 nFieldsBeforeEndLength);
540 m_aEditBuffer.WriteCharPtr(" ");
541 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
542 m_aEditBuffer.WriteCharPtr(" 0 R");
543 // Length of Fields end -> end of the Catalog dictionary.
544 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
545 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
546 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
547 + nFieldsEndOffset,
548 nFieldsAfterEndLength);
550 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
553 return true;
556 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
558 if (m_pXRefStream)
560 // Write the xref stream.
561 // This is a bit meta: the xref stream stores its own offset.
562 sal_Int32 nXRefStreamId = m_aXRef.size();
563 XRefEntry aXRefStreamEntry;
564 aXRefStreamEntry.SetOffset(nXRefOffset);
565 aXRefStreamEntry.SetDirty(true);
566 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
568 // Write stream data.
569 SvMemoryStream aXRefStream;
570 const size_t nOffsetLen = 3;
571 // 3 additional bytes: predictor, the first and the third field.
572 const size_t nLineLength = nOffsetLen + 3;
573 // This is the line as it appears before tweaking according to the predictor.
574 std::vector<unsigned char> aOrigLine(nLineLength);
575 // This is the previous line.
576 std::vector<unsigned char> aPrevLine(nLineLength);
577 // This is the line as written to the stream.
578 std::vector<unsigned char> aFilteredLine(nLineLength);
579 for (const auto& rXRef : m_aXRef)
581 const XRefEntry& rEntry = rXRef.second;
583 if (!rEntry.GetDirty())
584 continue;
586 // Predictor.
587 size_t nPos = 0;
588 // PNG prediction: up (on all rows).
589 aOrigLine[nPos++] = 2;
591 // First field.
592 unsigned char nType = 0;
593 switch (rEntry.GetType())
595 case XRefEntryType::FREE:
596 nType = 0;
597 break;
598 case XRefEntryType::NOT_COMPRESSED:
599 nType = 1;
600 break;
601 case XRefEntryType::COMPRESSED:
602 nType = 2;
603 break;
605 aOrigLine[nPos++] = nType;
607 // Second field.
608 for (size_t i = 0; i < nOffsetLen; ++i)
610 size_t nByte = nOffsetLen - i - 1;
611 // Fields requiring more than one byte are stored with the
612 // high-order byte first.
613 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
614 aOrigLine[nPos++] = nCh;
617 // Third field.
618 aOrigLine[nPos++] = 0;
620 // Now apply the predictor.
621 aFilteredLine[0] = aOrigLine[0];
622 for (size_t i = 1; i < nLineLength; ++i)
624 // Count the delta vs the previous line.
625 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
626 // Remember the new reference.
627 aPrevLine[i] = aOrigLine[i];
630 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
633 m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
634 m_aEditBuffer.WriteCharPtr(
635 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
637 // ID.
638 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
639 if (pID)
641 const std::vector<PDFElement*>& rElements = pID->GetElements();
642 m_aEditBuffer.WriteCharPtr("/ID [ <");
643 for (size_t i = 0; i < rElements.size(); ++i)
645 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
646 if (!pIDString)
647 continue;
649 m_aEditBuffer.WriteOString(pIDString->GetValue());
650 if ((i + 1) < rElements.size())
651 m_aEditBuffer.WriteCharPtr("> <");
653 m_aEditBuffer.WriteCharPtr("> ] ");
656 // Index.
657 m_aEditBuffer.WriteCharPtr("/Index [ ");
658 for (const auto& rXRef : m_aXRef)
660 if (!rXRef.second.GetDirty())
661 continue;
663 m_aEditBuffer.WriteUInt32AsString(rXRef.first);
664 m_aEditBuffer.WriteCharPtr(" 1 ");
666 m_aEditBuffer.WriteCharPtr("] ");
668 // Info.
669 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
670 if (pInfo)
672 m_aEditBuffer.WriteCharPtr("/Info ");
673 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
674 m_aEditBuffer.WriteCharPtr(" ");
675 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
676 m_aEditBuffer.WriteCharPtr(" R ");
679 // Length.
680 m_aEditBuffer.WriteCharPtr("/Length ");
682 ZCodec aZCodec;
683 aZCodec.BeginCompression();
684 aXRefStream.Seek(0);
685 SvMemoryStream aStream;
686 aZCodec.Compress(aXRefStream, aStream);
687 aZCodec.EndCompression();
688 aXRefStream.Seek(0);
689 aXRefStream.SetStreamSize(0);
690 aStream.Seek(0);
691 aXRefStream.WriteStream(aStream);
693 m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
695 if (!m_aStartXRefs.empty())
697 // Write location of the previous cross-reference section.
698 m_aEditBuffer.WriteCharPtr("/Prev ");
699 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
702 // Root.
703 m_aEditBuffer.WriteCharPtr("/Root ");
704 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
705 m_aEditBuffer.WriteCharPtr(" ");
706 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
707 m_aEditBuffer.WriteCharPtr(" R ");
709 // Size.
710 m_aEditBuffer.WriteCharPtr("/Size ");
711 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
713 m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
714 aXRefStream.Seek(0);
715 m_aEditBuffer.WriteStream(aXRefStream);
716 m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
718 else
720 // Write the xref table.
721 m_aEditBuffer.WriteCharPtr("xref\n");
722 for (const auto& rXRef : m_aXRef)
724 size_t nObject = rXRef.first;
725 size_t nOffset = rXRef.second.GetOffset();
726 if (!rXRef.second.GetDirty())
727 continue;
729 m_aEditBuffer.WriteUInt32AsString(nObject);
730 m_aEditBuffer.WriteCharPtr(" 1\n");
731 OStringBuffer aBuffer;
732 aBuffer.append(static_cast<sal_Int32>(nOffset));
733 while (aBuffer.getLength() < 10)
734 aBuffer.insert(0, "0");
735 if (nObject == 0)
736 aBuffer.append(" 65535 f \n");
737 else
738 aBuffer.append(" 00000 n \n");
739 m_aEditBuffer.WriteOString(aBuffer.toString());
742 // Write the trailer.
743 m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
744 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
745 m_aEditBuffer.WriteCharPtr("/Root ");
746 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
747 m_aEditBuffer.WriteCharPtr(" ");
748 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
749 m_aEditBuffer.WriteCharPtr(" R\n");
750 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
751 if (pInfo)
753 m_aEditBuffer.WriteCharPtr("/Info ");
754 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
755 m_aEditBuffer.WriteCharPtr(" ");
756 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
757 m_aEditBuffer.WriteCharPtr(" R\n");
759 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
760 if (pID)
762 const std::vector<PDFElement*>& rElements = pID->GetElements();
763 m_aEditBuffer.WriteCharPtr("/ID [ <");
764 for (size_t i = 0; i < rElements.size(); ++i)
766 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
767 if (!pIDString)
768 continue;
770 m_aEditBuffer.WriteOString(pIDString->GetValue());
771 if ((i + 1) < rElements.size())
772 m_aEditBuffer.WriteCharPtr(">\n<");
774 m_aEditBuffer.WriteCharPtr("> ]\n");
777 if (!m_aStartXRefs.empty())
779 // Write location of the previous cross-reference section.
780 m_aEditBuffer.WriteCharPtr("/Prev ");
781 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
784 m_aEditBuffer.WriteCharPtr(">>\n");
788 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
789 const OUString& rDescription, bool bAdES)
791 m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
792 m_aEditBuffer.WriteCharPtr("\n");
794 sal_uInt64 nSignatureLastByteRangeOffset = 0;
795 sal_Int64 nSignatureContentOffset = 0;
796 sal_Int32 nSignatureId = WriteSignatureObject(
797 rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
799 sal_Int32 nAppearanceId = WriteAppearanceObject();
801 std::vector<PDFObjectElement*> aPages = GetPages();
802 if (aPages.empty() || !aPages[0])
804 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
805 return false;
808 PDFObjectElement& rFirstPage = *aPages[0];
809 sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId);
811 if (!WritePageObject(rFirstPage, nAnnotId))
813 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
814 return false;
817 PDFReferenceElement* pRoot = nullptr;
818 if (!WriteCatalogObject(nAnnotId, pRoot))
820 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
821 return false;
824 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
825 WriteXRef(nXRefOffset, pRoot);
827 // Write startxref.
828 m_aEditBuffer.WriteCharPtr("startxref\n");
829 m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
830 m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
832 // Finalize the signature, now that we know the total file size.
833 // Calculate the length of the last byte range.
834 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
835 sal_Int64 nLastByteRangeLength
836 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
837 // Write the length to the buffer.
838 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
839 OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
840 m_aEditBuffer.WriteOString(aByteRangeBuffer);
842 // Create the PKCS#7 object.
843 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
844 if (!aDerEncoded.hasElements())
846 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
847 return false;
850 m_aEditBuffer.Seek(0);
851 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
852 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
853 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
855 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
856 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
857 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
858 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
860 OStringBuffer aCMSHexBuffer;
861 svl::crypto::Signing aSigning(xCertificate);
862 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
863 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
864 if (!aSigning.Sign(aCMSHexBuffer))
866 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
867 return false;
870 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
872 m_aEditBuffer.Seek(nSignatureContentOffset);
873 m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
875 return true;
878 bool PDFDocument::Write(SvStream& rStream)
880 m_aEditBuffer.Seek(0);
881 rStream.WriteStream(m_aEditBuffer);
882 return rStream.good();
885 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
886 std::vector<std::unique_ptr<PDFElement>>& rElements,
887 PDFObjectElement* pObjectElement)
889 // Last seen object token.
890 PDFObjectElement* pObject = pObjectElement;
891 PDFNameElement* pObjectKey = nullptr;
892 PDFObjectElement* pObjectStream = nullptr;
893 bool bInXRef = false;
894 // The next number will be an xref offset.
895 bool bInStartXRef = false;
896 // Dictionary depth, so we know when we're outside any dictionaries.
897 int nDictionaryDepth = 0;
898 // Array depth, only the offset/length of the toplevel array is tracked.
899 int nArrayDepth = 0;
900 // Last seen array token that's outside any dictionaries.
901 PDFArrayElement* pArray = nullptr;
902 // If we're inside an obj/endobj pair.
903 bool bInObject = false;
904 while (true)
906 char ch;
907 rStream.ReadChar(ch);
908 if (rStream.eof())
909 break;
911 switch (ch)
913 case '%':
915 auto pComment = new PDFCommentElement(*this);
916 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
917 rStream.SeekRel(-1);
918 if (!rElements.back()->Read(rStream))
920 SAL_WARN("vcl.filter",
921 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
922 return false;
924 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
925 && m_aEOFs.back() == rStream.Tell())
927 // Found EOF and partial parsing requested, we're done.
928 return true;
930 break;
932 case '<':
934 // Dictionary or hex string.
935 rStream.ReadChar(ch);
936 rStream.SeekRel(-2);
937 if (ch == '<')
939 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
940 ++nDictionaryDepth;
942 else
943 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
944 if (!rElements.back()->Read(rStream))
946 SAL_WARN("vcl.filter",
947 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
948 return false;
950 break;
952 case '>':
954 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
955 --nDictionaryDepth;
956 rStream.SeekRel(-1);
957 if (!rElements.back()->Read(rStream))
959 SAL_WARN("vcl.filter",
960 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
961 return false;
963 break;
965 case '[':
967 auto pArr = new PDFArrayElement(pObject);
968 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
969 if (nDictionaryDepth == 0 && nArrayDepth == 0)
971 // The array is attached directly, inform the object.
972 pArray = pArr;
973 if (pObject)
975 pObject->SetArray(pArray);
976 pObject->SetArrayOffset(rStream.Tell());
979 ++nArrayDepth;
980 rStream.SeekRel(-1);
981 if (!rElements.back()->Read(rStream))
983 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
984 return false;
986 break;
988 case ']':
990 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
991 --nArrayDepth;
992 if (nArrayDepth == 0)
993 pArray = nullptr;
994 rStream.SeekRel(-1);
995 if (nDictionaryDepth == 0 && nArrayDepth == 0)
997 if (pObject)
999 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1002 if (!rElements.back()->Read(rStream))
1004 SAL_WARN("vcl.filter",
1005 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1006 return false;
1008 break;
1010 case '/':
1012 auto pNameElement = new PDFNameElement();
1013 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1014 rStream.SeekRel(-1);
1015 if (!pNameElement->Read(rStream))
1017 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1018 return false;
1020 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1021 && pNameElement->GetValue() == "ObjStm")
1022 pObjectStream = pObject;
1023 else
1024 pObjectKey = pNameElement;
1025 break;
1027 case '(':
1029 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1030 rStream.SeekRel(-1);
1031 if (!rElements.back()->Read(rStream))
1033 SAL_WARN("vcl.filter",
1034 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1035 return false;
1037 break;
1039 default:
1041 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-')
1043 // Numbering object: an integer or a real.
1044 auto pNumberElement = new PDFNumberElement();
1045 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1046 rStream.SeekRel(-1);
1047 if (!pNumberElement->Read(rStream))
1049 SAL_WARN("vcl.filter",
1050 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1051 return false;
1053 if (bInStartXRef)
1055 bInStartXRef = false;
1056 m_aStartXRefs.push_back(pNumberElement->GetValue());
1058 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1059 if (it != m_aOffsetObjects.end())
1060 m_pXRefStream = it->second;
1062 else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1063 // Number element inside an object, but outside a
1064 // dictionary / array: remember it.
1065 pObject->SetNumberElement(pNumberElement);
1067 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1069 // Possible keyword, like "obj".
1070 rStream.SeekRel(-1);
1071 OString aKeyword = ReadKeyword(rStream);
1073 bool bObj = aKeyword == "obj";
1074 if (bObj || aKeyword == "R")
1076 size_t nElements = rElements.size();
1077 if (nElements < 2)
1079 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1080 "tokens before 'obj' or 'R' keyword");
1081 return false;
1084 auto pObjectNumber
1085 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1086 auto pGenerationNumber
1087 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1088 if (!pObjectNumber || !pGenerationNumber)
1090 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1091 "generation number before 'obj' or 'R' keyword");
1092 return false;
1095 if (bObj)
1097 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1098 pGenerationNumber->GetValue());
1099 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1100 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1101 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1102 bInObject = true;
1104 else
1106 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1107 *pGenerationNumber);
1108 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1109 if (pArray)
1110 // Reference is part of a direct (non-dictionary) array, inform the array.
1111 pArray->PushBack(rElements.back().get());
1112 if (bInObject && nDictionaryDepth > 0 && pObject)
1113 // Inform the object about a new in-dictionary reference.
1114 pObject->AddDictionaryReference(pReference);
1116 if (!rElements.back()->Read(rStream))
1118 SAL_WARN("vcl.filter",
1119 "PDFDocument::Tokenize: PDFElement::Read() failed");
1120 return false;
1123 else if (aKeyword == "stream")
1125 // Look up the length of the stream from the parent object's dictionary.
1126 size_t nLength = 0;
1127 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1129 // Iterate in reverse order.
1130 size_t nIndex = rElements.size() - nElement - 1;
1131 PDFElement* pElement = rElements[nIndex].get();
1132 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1133 if (!pObj)
1134 continue;
1136 PDFElement* pLookup = pObj->Lookup("Length");
1137 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1138 if (pReference)
1140 // Length is provided as a reference.
1141 nLength = pReference->LookupNumber(rStream);
1142 break;
1145 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1146 if (pNumber)
1148 // Length is provided directly.
1149 nLength = pNumber->GetValue();
1150 break;
1153 SAL_WARN(
1154 "vcl.filter",
1155 "PDFDocument::Tokenize: found no Length key for stream keyword");
1156 return false;
1159 PDFDocument::SkipLineBreaks(rStream);
1160 auto pStreamElement = new PDFStreamElement(nLength);
1161 if (pObject)
1162 pObject->SetStream(pStreamElement);
1163 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1164 if (!rElements.back()->Read(rStream))
1166 SAL_WARN("vcl.filter",
1167 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1168 return false;
1171 else if (aKeyword == "endstream")
1173 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1174 if (!rElements.back()->Read(rStream))
1176 SAL_WARN("vcl.filter",
1177 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1178 return false;
1181 else if (aKeyword == "endobj")
1183 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1184 if (!rElements.back()->Read(rStream))
1186 SAL_WARN("vcl.filter",
1187 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1188 return false;
1190 if (eMode == TokenizeMode::END_OF_OBJECT)
1192 // Found endobj and only object parsing was requested, we're done.
1193 return true;
1196 if (pObjectStream)
1198 // We're at the end of an object stream, parse the stored objects.
1199 pObjectStream->ParseStoredObjects();
1200 pObjectStream = nullptr;
1201 pObjectKey = nullptr;
1203 bInObject = false;
1205 else if (aKeyword == "true" || aKeyword == "false")
1206 rElements.push_back(std::unique_ptr<PDFElement>(
1207 new PDFBooleanElement(aKeyword.toBoolean())));
1208 else if (aKeyword == "null")
1209 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1210 else if (aKeyword == "xref")
1211 // Allow 'f' and 'n' keywords.
1212 bInXRef = true;
1213 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1216 else if (aKeyword == "trailer")
1218 auto pTrailer = new PDFTrailerElement(*this);
1220 // Make it possible to find this trailer later by offset.
1221 pTrailer->Read(rStream);
1222 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1224 // When reading till the first EOF token only, remember
1225 // just the first trailer token.
1226 if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1227 m_pTrailer = pTrailer;
1228 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1230 else if (aKeyword == "startxref")
1232 bInStartXRef = true;
1234 else
1236 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1237 << aKeyword << "' keyword at byte position "
1238 << rStream.Tell());
1239 return false;
1242 else
1244 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1246 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
1247 << ch << " at byte position " << rStream.Tell());
1248 return false;
1251 break;
1256 return true;
1259 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1261 m_aIDObjects[nID] = pObject;
1264 bool PDFDocument::Read(SvStream& rStream)
1266 // Check file magic.
1267 std::vector<sal_Int8> aHeader(5);
1268 rStream.Seek(0);
1269 rStream.ReadBytes(aHeader.data(), aHeader.size());
1270 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1271 || aHeader[4] != '-')
1273 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1274 return false;
1277 // Allow later editing of the contents in-memory.
1278 rStream.Seek(0);
1279 m_aEditBuffer.WriteStream(rStream);
1281 // Look up the offset of the xref table.
1282 size_t nStartXRef = FindStartXRef(rStream);
1283 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1284 if (nStartXRef == 0)
1286 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1287 return false;
1289 while (true)
1291 rStream.Seek(nStartXRef);
1292 OString aKeyword = ReadKeyword(rStream);
1293 if (aKeyword.isEmpty())
1294 ReadXRefStream(rStream);
1296 else
1298 if (aKeyword != "xref")
1300 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1301 return false;
1303 ReadXRef(rStream);
1304 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1306 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1307 return false;
1311 PDFNumberElement* pPrev = nullptr;
1312 if (m_pTrailer)
1314 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1316 // Remember the offset of this trailer in the correct order. It's
1317 // possible that newer trailers don't have a larger offset.
1318 m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1320 else if (m_pXRefStream)
1321 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1322 if (pPrev)
1323 nStartXRef = pPrev->GetValue();
1325 // Reset state, except the edit buffer.
1326 m_aElements.clear();
1327 m_aOffsetObjects.clear();
1328 m_aIDObjects.clear();
1329 m_aStartXRefs.clear();
1330 m_aEOFs.clear();
1331 m_pTrailer = nullptr;
1332 m_pXRefStream = nullptr;
1333 if (!pPrev)
1334 break;
1337 // Then we can tokenize the stream.
1338 rStream.Seek(0);
1339 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1342 OString PDFDocument::ReadKeyword(SvStream& rStream)
1344 OStringBuffer aBuf;
1345 char ch;
1346 rStream.ReadChar(ch);
1347 if (rStream.eof())
1348 return OString();
1349 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1351 aBuf.append(ch);
1352 rStream.ReadChar(ch);
1353 if (rStream.eof())
1354 return aBuf.toString();
1356 rStream.SeekRel(-1);
1357 return aBuf.toString();
1360 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1362 // Find the "startxref" token, somewhere near the end of the document.
1363 std::vector<char> aBuf(1024);
1364 rStream.Seek(STREAM_SEEK_TO_END);
1365 if (rStream.Tell() > aBuf.size())
1366 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1367 else
1368 // The document is really short, then just read it from the start.
1369 rStream.Seek(0);
1370 size_t nBeforePeek = rStream.Tell();
1371 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1372 rStream.Seek(nBeforePeek);
1373 if (nSize != aBuf.size())
1374 aBuf.resize(nSize);
1375 OString aPrefix("startxref");
1376 // Find the last startxref at the end of the document.
1377 auto itLastValid = aBuf.end();
1378 auto it = aBuf.begin();
1379 while (true)
1381 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1382 if (it == aBuf.end())
1383 break;
1385 itLastValid = it;
1386 ++it;
1388 if (itLastValid == aBuf.end())
1390 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1391 return 0;
1394 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1395 if (rStream.eof())
1397 SAL_WARN("vcl.filter",
1398 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1399 return 0;
1402 PDFDocument::SkipWhitespace(rStream);
1403 PDFNumberElement aNumber;
1404 if (!aNumber.Read(rStream))
1405 return 0;
1406 return aNumber.GetValue();
1409 void PDFDocument::ReadXRefStream(SvStream& rStream)
1411 // Look up the stream length in the object dictionary.
1412 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1414 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1415 return;
1418 if (m_aElements.empty())
1420 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1421 return;
1424 PDFObjectElement* pObject = nullptr;
1425 for (const auto& pElement : m_aElements)
1427 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1429 pObject = pObj;
1430 break;
1433 if (!pObject)
1435 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1436 return;
1439 // So that the Prev key can be looked up later.
1440 m_pXRefStream = pObject;
1442 PDFElement* pLookup = pObject->Lookup("Length");
1443 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1444 if (!pNumber)
1446 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1447 return;
1449 sal_uInt64 nLength = pNumber->GetValue();
1451 // Look up the stream offset.
1452 PDFStreamElement* pStream = nullptr;
1453 for (const auto& pElement : m_aElements)
1455 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1457 pStream = pS;
1458 break;
1461 if (!pStream)
1463 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1464 return;
1467 // Read and decompress it.
1468 rStream.Seek(pStream->GetOffset());
1469 std::vector<char> aBuf(nLength);
1470 rStream.ReadBytes(aBuf.data(), aBuf.size());
1472 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1473 if (!pFilter)
1475 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1476 return;
1479 if (pFilter->GetValue() != "FlateDecode")
1481 SAL_WARN("vcl.filter",
1482 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1483 return;
1486 int nColumns = 1;
1487 int nPredictor = 1;
1488 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1490 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1491 auto it = rItems.find("Columns");
1492 if (it != rItems.end())
1493 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1494 nColumns = pColumns->GetValue();
1495 it = rItems.find("Predictor");
1496 if (it != rItems.end())
1497 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1498 nPredictor = pPredictor->GetValue();
1501 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1502 SvMemoryStream aStream;
1503 ZCodec aZCodec;
1504 aZCodec.BeginCompression();
1505 aZCodec.Decompress(aSource, aStream);
1506 if (!aZCodec.EndCompression())
1508 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1509 return;
1512 // Look up the first and the last entry we need to read.
1513 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1514 std::vector<size_t> aFirstObjects;
1515 std::vector<size_t> aNumberOfObjects;
1516 if (!pIndex)
1518 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1519 if (pSize)
1521 aFirstObjects.push_back(0);
1522 aNumberOfObjects.push_back(pSize->GetValue());
1524 else
1526 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1527 return;
1530 else
1532 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1533 size_t nFirstObject = 0;
1534 for (size_t i = 0; i < rIndexElements.size(); ++i)
1536 if (i % 2 == 0)
1538 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1539 if (!pFirstObject)
1541 SAL_WARN("vcl.filter",
1542 "PDFDocument::ReadXRefStream: Index has no first object");
1543 return;
1545 nFirstObject = pFirstObject->GetValue();
1546 continue;
1549 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1550 if (!pNumberOfObjects)
1552 SAL_WARN("vcl.filter",
1553 "PDFDocument::ReadXRefStream: Index has no number of objects");
1554 return;
1556 aFirstObjects.push_back(nFirstObject);
1557 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1561 // Look up the format of a single entry.
1562 const int nWSize = 3;
1563 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1564 if (!pW || pW->GetElements().size() < nWSize)
1566 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1567 return;
1569 int aW[nWSize];
1570 // First character is the (kind of) repeated predictor.
1571 int nLineLength = 1;
1572 for (size_t i = 0; i < nWSize; ++i)
1574 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1575 if (!pI)
1577 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1578 return;
1580 aW[i] = pI->GetValue();
1581 nLineLength += aW[i];
1584 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1586 SAL_WARN("vcl.filter",
1587 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1588 return;
1591 aStream.Seek(0);
1592 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1594 size_t nFirstObject = aFirstObjects[nSubSection];
1595 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1597 // This is the line as read from the stream.
1598 std::vector<unsigned char> aOrigLine(nLineLength);
1599 // This is the line as it appears after tweaking according to nPredictor.
1600 std::vector<unsigned char> aFilteredLine(nLineLength);
1601 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1603 size_t nIndex = nFirstObject + nEntry;
1605 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1606 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1608 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1609 "inconsistent with /DecodeParms/Predictor for object #"
1610 << nIndex);
1611 return;
1614 for (int i = 0; i < nLineLength; ++i)
1616 switch (nPredictor)
1618 case 1:
1619 // No prediction.
1620 break;
1621 case 12:
1622 // PNG prediction: up (on all rows).
1623 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1624 break;
1625 default:
1626 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1627 << nPredictor);
1628 return;
1629 break;
1633 // First character is already handled above.
1634 int nPos = 1;
1635 size_t nType = 0;
1636 // Start of the current field in the stream data.
1637 int nOffset = nPos;
1638 for (; nPos < nOffset + aW[0]; ++nPos)
1640 unsigned char nCh = aFilteredLine[nPos];
1641 nType = (nType << 8) + nCh;
1644 // Start of the object in the file stream.
1645 size_t nStreamOffset = 0;
1646 nOffset = nPos;
1647 for (; nPos < nOffset + aW[1]; ++nPos)
1649 unsigned char nCh = aFilteredLine[nPos];
1650 nStreamOffset = (nStreamOffset << 8) + nCh;
1653 // Generation number of the object.
1654 size_t nGenerationNumber = 0;
1655 nOffset = nPos;
1656 for (; nPos < nOffset + aW[2]; ++nPos)
1658 unsigned char nCh = aFilteredLine[nPos];
1659 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1662 // Ignore invalid nType.
1663 if (nType <= 2)
1665 if (m_aXRef.find(nIndex) == m_aXRef.end())
1667 XRefEntry aEntry;
1668 switch (nType)
1670 case 0:
1671 aEntry.SetType(XRefEntryType::FREE);
1672 break;
1673 case 1:
1674 aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1675 break;
1676 case 2:
1677 aEntry.SetType(XRefEntryType::COMPRESSED);
1678 break;
1680 aEntry.SetOffset(nStreamOffset);
1681 m_aXRef[nIndex] = aEntry;
1688 void PDFDocument::ReadXRef(SvStream& rStream)
1690 PDFDocument::SkipWhitespace(rStream);
1692 while (true)
1694 PDFNumberElement aFirstObject;
1695 if (!aFirstObject.Read(rStream))
1697 // Next token is not a number, it'll be the trailer.
1698 return;
1701 if (aFirstObject.GetValue() < 0)
1703 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1704 return;
1707 PDFDocument::SkipWhitespace(rStream);
1708 PDFNumberElement aNumberOfEntries;
1709 if (!aNumberOfEntries.Read(rStream))
1711 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1712 return;
1715 if (aNumberOfEntries.GetValue() < 0)
1717 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1718 return;
1721 size_t nSize = aNumberOfEntries.GetValue();
1722 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1724 size_t nIndex = aFirstObject.GetValue() + nEntry;
1725 PDFDocument::SkipWhitespace(rStream);
1726 PDFNumberElement aOffset;
1727 if (!aOffset.Read(rStream))
1729 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1730 return;
1733 PDFDocument::SkipWhitespace(rStream);
1734 PDFNumberElement aGenerationNumber;
1735 if (!aGenerationNumber.Read(rStream))
1737 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1738 return;
1741 PDFDocument::SkipWhitespace(rStream);
1742 OString aKeyword = ReadKeyword(rStream);
1743 if (aKeyword != "f" && aKeyword != "n")
1745 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1746 return;
1748 // xrefs are read in reverse order, so never update an existing
1749 // offset with an older one.
1750 if (m_aXRef.find(nIndex) == m_aXRef.end())
1752 XRefEntry aEntry;
1753 aEntry.SetOffset(aOffset.GetValue());
1754 // Initially only the first entry is dirty.
1755 if (nIndex == 0)
1756 aEntry.SetDirty(true);
1757 m_aXRef[nIndex] = aEntry;
1759 PDFDocument::SkipWhitespace(rStream);
1764 void PDFDocument::SkipWhitespace(SvStream& rStream)
1766 char ch = 0;
1768 while (true)
1770 rStream.ReadChar(ch);
1771 if (rStream.eof())
1772 break;
1774 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1776 rStream.SeekRel(-1);
1777 return;
1782 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1784 char ch = 0;
1786 while (true)
1788 rStream.ReadChar(ch);
1789 if (rStream.eof())
1790 break;
1792 if (ch != '\n' && ch != '\r')
1794 rStream.SeekRel(-1);
1795 return;
1800 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1802 auto it = m_aXRef.find(nIndex);
1803 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1805 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1806 << nIndex << ", but failed");
1807 return 0;
1810 return it->second.GetOffset();
1813 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1815 return m_aElements;
1818 /// Visits the page tree recursively, looking for page objects.
1819 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1821 auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1822 if (!pKids)
1824 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1825 return;
1828 pPages->setVisiting(true);
1830 for (const auto& pKid : pKids->GetElements())
1832 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1833 if (!pReference)
1834 continue;
1836 PDFObjectElement* pKidObject = pReference->LookupObject();
1837 if (!pKidObject)
1838 continue;
1840 // detect if visiting reenters itself
1841 if (pKidObject->alreadyVisiting())
1843 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1844 continue;
1847 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1848 if (pName && pName->GetValue() == "Pages")
1849 // Pages inside pages: recurse.
1850 visitPages(pKidObject, rRet);
1851 else
1852 // Found an actual page.
1853 rRet.push_back(pKidObject);
1856 pPages->setVisiting(false);
1859 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1861 std::vector<PDFObjectElement*> aRet;
1863 PDFReferenceElement* pRoot = nullptr;
1865 PDFTrailerElement* pTrailer = nullptr;
1866 if (!m_aTrailerOffsets.empty())
1868 // Get access to the latest trailer, and work with the keys of that
1869 // one.
1870 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1871 if (it != m_aOffsetTrailers.end())
1872 pTrailer = it->second;
1875 if (pTrailer)
1876 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1877 else if (m_pXRefStream)
1878 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1880 if (!pRoot)
1882 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1883 return aRet;
1886 PDFObjectElement* pCatalog = pRoot->LookupObject();
1887 if (!pCatalog)
1889 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1890 return aRet;
1893 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1894 if (!pPages)
1896 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1897 << ") has no pages");
1898 return aRet;
1901 visitPages(pPages, aRet);
1903 return aRet;
1906 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1908 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
1910 std::vector<PDFObjectElement*> aRet;
1912 std::vector<PDFObjectElement*> aPages = GetPages();
1914 for (const auto& pPage : aPages)
1916 if (!pPage)
1917 continue;
1919 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
1920 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
1921 if (!pAnnots)
1923 // Annots is not an array, see if it's a reference to an object
1924 // with a direct array.
1925 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
1926 if (pAnnotsRef)
1928 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
1930 pAnnots = pAnnotsObject->GetArray();
1935 if (!pAnnots)
1936 continue;
1938 for (const auto& pAnnot : pAnnots->GetElements())
1940 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
1941 if (!pReference)
1942 continue;
1944 PDFObjectElement* pAnnotObject = pReference->LookupObject();
1945 if (!pAnnotObject)
1946 continue;
1948 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
1949 if (!pFT || pFT->GetValue() != "Sig")
1950 continue;
1952 aRet.push_back(pAnnotObject);
1956 return aRet;
1959 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
1961 return svl::crypto::DecodeHexString(pElement->GetValue());
1964 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
1965 : m_rDoc(rDoc)
1969 bool PDFCommentElement::Read(SvStream& rStream)
1971 // Read from (including) the % char till (excluding) the end of the line/stream.
1972 OStringBuffer aBuf;
1973 char ch;
1974 rStream.ReadChar(ch);
1975 while (true)
1977 if (ch == '\n' || ch == '\r' || rStream.eof())
1979 m_aComment = aBuf.makeStringAndClear();
1981 if (m_aComment.startsWith("%%EOF"))
1982 m_rDoc.PushBackEOF(rStream.Tell());
1984 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
1985 return true;
1987 aBuf.append(ch);
1988 rStream.ReadChar(ch);
1991 return false;
1994 PDFNumberElement::PDFNumberElement() = default;
1996 bool PDFNumberElement::Read(SvStream& rStream)
1998 OStringBuffer aBuf;
1999 m_nOffset = rStream.Tell();
2000 char ch;
2001 rStream.ReadChar(ch);
2002 if (rStream.eof())
2004 return false;
2006 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2008 rStream.SeekRel(-1);
2009 return false;
2011 while (!rStream.eof())
2013 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2015 rStream.SeekRel(-1);
2016 m_nLength = rStream.Tell() - m_nOffset;
2017 m_fValue = aBuf.makeStringAndClear().toDouble();
2018 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2019 return true;
2021 aBuf.append(ch);
2022 rStream.ReadChar(ch);
2025 return false;
2028 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2030 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2032 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) {}
2034 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2036 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2038 bool PDFHexStringElement::Read(SvStream& rStream)
2040 char ch;
2041 rStream.ReadChar(ch);
2042 if (ch != '<')
2044 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2045 return false;
2047 rStream.ReadChar(ch);
2049 OStringBuffer aBuf;
2050 while (!rStream.eof())
2052 if (ch == '>')
2054 m_aValue = aBuf.makeStringAndClear();
2055 SAL_INFO("vcl.filter",
2056 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2057 return true;
2059 aBuf.append(ch);
2060 rStream.ReadChar(ch);
2063 return false;
2066 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2068 bool PDFLiteralStringElement::Read(SvStream& rStream)
2070 char nPrevCh = 0;
2071 char ch = 0;
2072 rStream.ReadChar(ch);
2073 if (ch != '(')
2075 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2076 return false;
2078 nPrevCh = ch;
2079 rStream.ReadChar(ch);
2081 // Start with 1 nesting level as we read a '(' above already.
2082 int nDepth = 1;
2083 OStringBuffer aBuf;
2084 while (!rStream.eof())
2086 if (ch == '(' && nPrevCh != '\\')
2087 ++nDepth;
2089 if (ch == ')' && nPrevCh != '\\')
2090 --nDepth;
2092 if (nDepth == 0)
2094 // ')' of the outermost '(' is reached.
2095 m_aValue = aBuf.makeStringAndClear();
2096 SAL_INFO("vcl.filter",
2097 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2098 return true;
2100 aBuf.append(ch);
2101 nPrevCh = ch;
2102 rStream.ReadChar(ch);
2105 return false;
2108 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2110 PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
2111 : m_rDoc(rDoc)
2115 bool PDFTrailerElement::Read(SvStream& rStream)
2117 m_nOffset = rStream.Tell();
2118 return true;
2121 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2123 if (m_aDictionary.empty())
2124 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2126 return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2129 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2131 double PDFNumberElement::GetValue() const { return m_fValue; }
2133 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2134 : m_rDoc(rDoc)
2135 , m_fObjectValue(fObjectValue)
2136 , m_fGenerationValue(fGenerationValue)
2137 , m_pNumberElement(nullptr)
2138 , m_nDictionaryOffset(0)
2139 , m_nDictionaryLength(0)
2140 , m_pDictionaryElement(nullptr)
2141 , m_nArrayOffset(0)
2142 , m_nArrayLength(0)
2143 , m_pArrayElement(nullptr)
2144 , m_pStreamElement(nullptr)
2148 bool PDFObjectElement::Read(SvStream& /*rStream*/)
2150 SAL_INFO("vcl.filter",
2151 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2152 return true;
2155 PDFDictionaryElement::PDFDictionaryElement() = default;
2157 size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
2158 PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2160 // The index of last parsed element, in case of nested dictionaries.
2161 size_t nRet = 0;
2163 if (!rDictionary.empty())
2164 return nRet;
2166 pThis->setParsing(true);
2168 auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2169 // This is set to non-nullptr here for nested dictionaries only.
2170 auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2172 // Find out where the dictionary for this object starts.
2173 size_t nIndex = 0;
2174 for (size_t i = 0; i < rElements.size(); ++i)
2176 if (rElements[i].get() == pThis)
2178 nIndex = i;
2179 break;
2183 OString aName;
2184 sal_uInt64 nNameOffset = 0;
2185 std::vector<PDFNumberElement*> aNumbers;
2186 // The array value we're in -- if any.
2187 PDFArrayElement* pArray = nullptr;
2188 sal_uInt64 nDictionaryOffset = 0;
2189 int nDictionaryDepth = 0;
2190 // Toplevel dictionary found (not inside an array).
2191 bool bDictionaryFound = false;
2192 // Toplevel array found (not inside a dictionary).
2193 bool bArrayFound = false;
2194 for (size_t i = nIndex; i < rElements.size(); ++i)
2196 // Dictionary tokens can be nested, track enter/leave.
2197 if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2199 bDictionaryFound = true;
2200 if (++nDictionaryDepth == 1)
2202 // First dictionary start, track start offset.
2203 nDictionaryOffset = pDictionary->m_nLocation;
2204 if (pThisObject)
2206 if (!bArrayFound)
2207 // Then the toplevel dictionary of the object.
2208 pThisObject->SetDictionary(pDictionary);
2209 pThisDictionary = pDictionary;
2210 pThisObject->SetDictionaryOffset(nDictionaryOffset);
2213 else if (!pDictionary->alreadyParsing())
2215 // Nested dictionary.
2216 const size_t nexti
2217 = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2218 if (nexti >= i) // ensure we go forwards and not endlessly loop
2220 i = nexti;
2221 rDictionary[aName] = pDictionary;
2222 aName.clear();
2227 if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2229 if (--nDictionaryDepth == 0)
2231 // Last dictionary end, track length and stop parsing.
2232 if (pThisObject)
2233 pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
2234 - nDictionaryOffset);
2235 nRet = i;
2236 break;
2240 auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2241 if (pName)
2243 if (!aNumbers.empty())
2245 PDFNumberElement* pNumber = aNumbers.back();
2246 rDictionary[aName] = pNumber;
2247 if (pThisDictionary)
2249 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2250 pThisDictionary->SetKeyValueLength(
2251 aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2253 aName.clear();
2254 aNumbers.clear();
2257 if (aName.isEmpty())
2259 // Remember key.
2260 aName = pName->GetValue();
2261 nNameOffset = pName->GetLocation();
2263 else
2265 if (pArray)
2267 if (bDictionaryFound)
2268 // Array inside dictionary.
2269 pArray->PushBack(pName);
2271 else
2273 // Name-name key-value.
2274 rDictionary[aName] = pName;
2275 if (pThisDictionary)
2277 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2278 pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
2279 + PDFNameElement::GetLength()
2280 - nNameOffset);
2282 aName.clear();
2285 continue;
2288 auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2289 if (pArr)
2291 bArrayFound = true;
2292 pArray = pArr;
2293 continue;
2296 auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2297 if (pArray && pEndArr)
2299 for (auto& pNumber : aNumbers)
2300 pArray->PushBack(pNumber);
2301 aNumbers.clear();
2302 rDictionary[aName] = pArray;
2303 if (pThisDictionary)
2305 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2306 // Include the ending ']' in the length of the key - (array)value pair length.
2307 pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2309 aName.clear();
2310 pArray = nullptr;
2311 continue;
2314 auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2315 if (pReference)
2317 if (!pArray)
2319 rDictionary[aName] = pReference;
2320 if (pThisDictionary)
2322 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2323 pThisDictionary->SetKeyValueLength(aName,
2324 pReference->GetOffset() - nNameOffset);
2326 aName.clear();
2328 else
2330 if (bDictionaryFound)
2331 // Array inside dictionary.
2332 pArray->PushBack(pReference);
2334 aNumbers.clear();
2335 continue;
2338 auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2339 if (pLiteralString)
2341 rDictionary[aName] = pLiteralString;
2342 if (pThisDictionary)
2343 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2344 aName.clear();
2345 continue;
2348 auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2349 if (pBoolean)
2351 rDictionary[aName] = pBoolean;
2352 if (pThisDictionary)
2353 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2354 aName.clear();
2355 continue;
2358 auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2359 if (pHexString)
2361 if (!pArray)
2363 rDictionary[aName] = pHexString;
2364 if (pThisDictionary)
2365 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2366 aName.clear();
2368 else
2370 pArray->PushBack(pHexString);
2372 continue;
2375 if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2376 break;
2378 // Just remember this, so that in case it's not a reference parameter,
2379 // we can handle it later.
2380 auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2381 if (pNumber)
2382 aNumbers.push_back(pNumber);
2385 if (!aNumbers.empty())
2387 rDictionary[aName] = aNumbers.back();
2388 if (pThisDictionary)
2389 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2390 aName.clear();
2391 aNumbers.clear();
2394 pThis->setParsing(false);
2396 return nRet;
2399 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2400 const OString& rKey)
2402 auto it = rDictionary.find(rKey);
2403 if (it == rDictionary.end())
2404 return nullptr;
2406 return it->second;
2409 PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey)
2411 auto pKey = dynamic_cast<PDFReferenceElement*>(
2412 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2413 if (!pKey)
2415 SAL_WARN("vcl.filter",
2416 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2417 << rDictionaryKey);
2418 return nullptr;
2421 return pKey->LookupObject();
2424 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2426 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2429 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2431 if (m_aDictionary.empty())
2433 if (!m_aElements.empty())
2434 // This is a stored object in an object stream.
2435 PDFDictionaryElement::Parse(m_aElements, this, m_aDictionary);
2436 else
2437 // Normal object: elements are stored as members of the document itself.
2438 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2441 return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2444 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2446 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2447 if (!pKey)
2449 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2450 << rDictionaryKey);
2451 return nullptr;
2454 return pKey->LookupObject();
2457 double PDFObjectElement::GetObjectValue() const { return m_fObjectValue; }
2459 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2461 m_nDictionaryOffset = nDictionaryOffset;
2464 sal_uInt64 PDFObjectElement::GetDictionaryOffset()
2466 if (m_aDictionary.empty())
2467 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2469 return m_nDictionaryOffset;
2472 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2474 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2476 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2478 m_aDictionaryKeyOffset[rKey] = nOffset;
2481 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2483 m_aDictionaryKeyValueLength[rKey] = nLength;
2486 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2488 auto it = m_aDictionaryKeyOffset.find(rKey);
2489 if (it == m_aDictionaryKeyOffset.end())
2490 return 0;
2492 return it->second;
2495 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2497 auto it = m_aDictionaryKeyValueLength.find(rKey);
2498 if (it == m_aDictionaryKeyValueLength.end())
2499 return 0;
2501 return it->second;
2504 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2506 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2508 m_nDictionaryLength = nDictionaryLength;
2511 sal_uInt64 PDFObjectElement::GetDictionaryLength()
2513 if (m_aDictionary.empty())
2514 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2516 return m_nDictionaryLength;
2519 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2521 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2523 PDFDictionaryElement* PDFObjectElement::GetDictionary()
2525 if (m_aDictionary.empty())
2526 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2527 return m_pDictionaryElement;
2530 void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
2532 m_pDictionaryElement = pDictionaryElement;
2535 void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
2537 m_pNumberElement = pNumberElement;
2540 PDFNumberElement* PDFObjectElement::GetNumberElement() const { return m_pNumberElement; }
2542 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2544 return m_aDictionaryReferences;
2547 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
2549 m_aDictionaryReferences.push_back(pReference);
2552 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2554 if (m_aDictionary.empty())
2555 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2557 return m_aDictionary;
2560 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2562 void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
2564 m_pStreamElement = pStreamElement;
2567 PDFStreamElement* PDFObjectElement::GetStream() const { return m_pStreamElement; }
2569 PDFArrayElement* PDFObjectElement::GetArray() const { return m_pArrayElement; }
2571 void PDFObjectElement::ParseStoredObjects()
2573 if (!m_pStreamElement)
2575 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2576 return;
2579 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2580 if (!pType || pType->GetValue() != "ObjStm")
2582 if (!pType)
2583 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2584 else
2585 SAL_WARN("vcl.filter",
2586 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2587 return;
2590 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2591 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2593 if (!pFilter)
2594 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2595 else
2596 SAL_WARN("vcl.filter",
2597 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2598 return;
2601 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2602 if (!pFirst)
2604 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2605 return;
2608 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2609 if (!pN)
2611 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2612 return;
2614 size_t nN = pN->GetValue();
2616 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2617 if (!pLength)
2619 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2620 return;
2622 size_t nLength = pLength->GetValue();
2624 // Read and decompress it.
2625 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2626 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2627 std::vector<char> aBuf(nLength);
2628 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2629 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2630 SvMemoryStream aStream;
2631 ZCodec aZCodec;
2632 aZCodec.BeginCompression();
2633 aZCodec.Decompress(aSource, aStream);
2634 if (!aZCodec.EndCompression())
2636 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2637 return;
2640 nLength = aStream.TellEnd();
2641 aStream.Seek(0);
2642 std::vector<size_t> aObjNums;
2643 std::vector<size_t> aOffsets;
2644 std::vector<size_t> aLengths;
2645 // First iterate over and find out the lengths.
2646 for (size_t nObject = 0; nObject < nN; ++nObject)
2648 PDFNumberElement aObjNum;
2649 if (!aObjNum.Read(aStream))
2651 SAL_WARN("vcl.filter",
2652 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2653 return;
2655 aObjNums.push_back(aObjNum.GetValue());
2657 PDFDocument::SkipWhitespace(aStream);
2659 PDFNumberElement aByteOffset;
2660 if (!aByteOffset.Read(aStream))
2662 SAL_WARN("vcl.filter",
2663 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2664 return;
2666 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2668 if (aOffsets.size() > 1)
2669 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2670 if (nObject + 1 == nN)
2671 aLengths.push_back(nLength - aOffsets.back());
2673 PDFDocument::SkipWhitespace(aStream);
2676 // Now create streams with the proper length and tokenize the data.
2677 for (size_t nObject = 0; nObject < nN; ++nObject)
2679 size_t nObjNum = aObjNums[nObject];
2680 size_t nOffset = aOffsets[nObject];
2681 size_t nLen = aLengths[nObject];
2683 aStream.Seek(nOffset);
2684 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2685 PDFObjectElement* pStored = m_aStoredElements.back().get();
2687 aBuf.clear();
2688 aBuf.resize(nLen);
2689 aStream.ReadBytes(aBuf.data(), aBuf.size());
2690 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2692 m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2693 pStored);
2694 // This is how references know the object is stored inside this object stream.
2695 m_rDoc.SetIDObject(nObjNum, pStored);
2697 // Store the stream of the object in the object stream for later use.
2698 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2699 aStoredStream.Seek(0);
2700 pStreamBuffer->WriteStream(aStoredStream);
2701 pStored->SetStreamBuffer(pStreamBuffer);
2705 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2707 return m_aElements;
2710 SvMemoryStream* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer.get(); }
2712 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2714 m_pStreamBuffer = std::move(pStreamBuffer);
2717 PDFDocument& PDFObjectElement::GetDocument() { return m_rDoc; }
2719 PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
2720 PDFNumberElement const& rGeneration)
2721 : m_rDoc(rDoc)
2722 , m_fObjectValue(rObject.GetValue())
2723 , m_fGenerationValue(rGeneration.GetValue())
2724 , m_rObject(rObject)
2728 PDFNumberElement& PDFReferenceElement::GetObjectElement() const { return m_rObject; }
2730 bool PDFReferenceElement::Read(SvStream& rStream)
2732 SAL_INFO("vcl.filter",
2733 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2734 m_nOffset = rStream.Tell();
2735 return true;
2738 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2740 double PDFReferenceElement::LookupNumber(SvStream& rStream) const
2742 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2743 if (nOffset == 0)
2745 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2746 << m_fObjectValue);
2747 return 0;
2750 sal_uInt64 nOrigPos = rStream.Tell();
2751 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2753 rStream.Seek(nOffset);
2755 PDFDocument::SkipWhitespace(rStream);
2756 PDFNumberElement aNumber;
2757 bool bRet = aNumber.Read(rStream);
2758 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2760 SAL_WARN("vcl.filter",
2761 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2762 return 0;
2767 PDFDocument::SkipWhitespace(rStream);
2768 PDFNumberElement aNumber;
2769 bool bRet = aNumber.Read(rStream);
2770 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2772 SAL_WARN("vcl.filter",
2773 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2774 return 0;
2779 PDFDocument::SkipWhitespace(rStream);
2780 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2781 if (aKeyword != "obj")
2783 SAL_WARN("vcl.filter",
2784 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2785 return 0;
2789 PDFDocument::SkipWhitespace(rStream);
2790 PDFNumberElement aNumber;
2791 if (!aNumber.Read(rStream))
2793 SAL_WARN("vcl.filter",
2794 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2795 return 0;
2798 return aNumber.GetValue();
2801 PDFObjectElement* PDFReferenceElement::LookupObject()
2803 return m_rDoc.LookupObject(m_fObjectValue);
2806 PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber)
2808 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2810 if (itIDObjects != m_aIDObjects.end())
2811 return itIDObjects->second;
2813 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2814 return nullptr;
2817 SvMemoryStream& PDFDocument::GetEditBuffer() { return m_aEditBuffer; }
2819 int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue; }
2821 int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue; }
2823 bool PDFDictionaryElement::Read(SvStream& rStream)
2825 char ch;
2826 rStream.ReadChar(ch);
2827 if (ch != '<')
2829 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2830 return false;
2833 if (rStream.eof())
2835 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2836 return false;
2839 rStream.ReadChar(ch);
2840 if (ch != '<')
2842 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2843 return false;
2846 m_nLocation = rStream.Tell();
2848 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2850 return true;
2853 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2855 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
2857 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2859 m_nLocation = rStream.Tell();
2860 char ch;
2861 rStream.ReadChar(ch);
2862 if (ch != '>')
2864 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2865 return false;
2868 if (rStream.eof())
2870 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2871 return false;
2874 rStream.ReadChar(ch);
2875 if (ch != '>')
2877 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2878 return false;
2881 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2883 return true;
2886 PDFNameElement::PDFNameElement() = default;
2888 bool PDFNameElement::Read(SvStream& rStream)
2890 char ch;
2891 rStream.ReadChar(ch);
2892 if (ch != '/')
2894 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2895 return false;
2897 m_nLocation = rStream.Tell();
2899 if (rStream.eof())
2901 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2902 return false;
2905 // Read till the first white-space.
2906 OStringBuffer aBuf;
2907 rStream.ReadChar(ch);
2908 while (!rStream.eof())
2910 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2911 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2913 rStream.SeekRel(-1);
2914 m_aValue = aBuf.makeStringAndClear();
2915 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2916 return true;
2918 aBuf.append(ch);
2919 rStream.ReadChar(ch);
2922 return false;
2925 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2927 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2929 PDFStreamElement::PDFStreamElement(size_t nLength)
2930 : m_nLength(nLength)
2931 , m_nOffset(0)
2935 bool PDFStreamElement::Read(SvStream& rStream)
2937 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2938 m_nOffset = rStream.Tell();
2939 std::vector<unsigned char> aBytes(m_nLength);
2940 rStream.ReadBytes(aBytes.data(), aBytes.size());
2941 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2943 return rStream.good();
2946 SvMemoryStream& PDFStreamElement::GetMemory() { return m_aMemory; }
2948 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2950 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2952 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2954 PDFArrayElement::PDFArrayElement(PDFObjectElement* pObject)
2955 : m_pObject(pObject)
2959 bool PDFArrayElement::Read(SvStream& rStream)
2961 char ch;
2962 rStream.ReadChar(ch);
2963 if (ch != '[')
2965 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2966 return false;
2969 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2971 return true;
2974 void PDFArrayElement::PushBack(PDFElement* pElement)
2976 if (m_pObject)
2977 SAL_INFO("vcl.filter",
2978 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2979 m_aElements.push_back(pElement);
2982 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2984 PDFEndArrayElement::PDFEndArrayElement() = default;
2986 bool PDFEndArrayElement::Read(SvStream& rStream)
2988 m_nOffset = rStream.Tell();
2989 char ch;
2990 rStream.ReadChar(ch);
2991 if (ch != ']')
2993 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2994 return false;
2997 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2999 return true;
3002 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
3004 } // namespace filter
3005 } // namespace vcl
3007 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */