build fix: no comphelper/profilezone.hxx in this branch
[LibreOffice.git] / vcl / source / filter / ipdf / pdfdocument.cxx
blob5661e605bee56556201c4e90ffcdf705db31fed3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <vcl/filter/pdfdocument.hxx>
12 #include <map>
13 #include <memory>
14 #include <vector>
16 #include <com/sun/star/uno/Sequence.hxx>
18 #include <comphelper/processfactory.hxx>
19 #include <comphelper/scopeguard.hxx>
20 #include <comphelper/string.hxx>
21 #include <filter/msfilter/mscodec.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <sax/tools/converter.hxx>
27 #include <tools/zcodec.hxx>
28 #include <unotools/calendarwrapper.hxx>
29 #include <unotools/datetime.hxx>
30 #include <vcl/pdfwriter.hxx>
31 #include <xmloff/xmluconv.hxx>
32 #include <o3tl/make_unique.hxx>
34 using namespace com::sun::star;
36 namespace vcl
38 namespace filter
41 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
43 class PDFTrailerElement;
45 /// A one-liner comment.
46 class PDFCommentElement : public PDFElement
48 PDFDocument& m_rDoc;
49 OString m_aComment;
51 public:
52 explicit PDFCommentElement(PDFDocument& rDoc);
53 bool Read(SvStream& rStream) override;
56 class PDFReferenceElement;
58 /// End of a dictionary: '>>'.
59 class PDFEndDictionaryElement : public PDFElement
61 /// Offset before the '>>' token.
62 sal_uInt64 m_nLocation = 0;
63 public:
64 PDFEndDictionaryElement();
65 bool Read(SvStream& rStream) override;
66 sal_uInt64 GetLocation() const;
69 /// End of a stream: 'endstream' keyword.
70 class PDFEndStreamElement : public PDFElement
72 public:
73 bool Read(SvStream& rStream) override;
76 /// End of a object: 'endobj' keyword.
77 class PDFEndObjectElement : public PDFElement
79 public:
80 bool Read(SvStream& rStream) override;
83 /// End of an array: ']'.
84 class PDFEndArrayElement : public PDFElement
86 /// Location before the ']' token.
87 sal_uInt64 m_nOffset = 0;
88 public:
89 PDFEndArrayElement();
90 bool Read(SvStream& rStream) override;
91 sal_uInt64 GetOffset() const;
94 /// Boolean object: a 'true' or a 'false'.
95 class PDFBooleanElement : public PDFElement
97 public:
98 explicit PDFBooleanElement(bool bValue);
99 bool Read(SvStream& rStream) override;
102 /// Null object: the 'null' singleton.
103 class PDFNullElement : public PDFElement
105 public:
106 bool Read(SvStream& rStream) override;
109 /// The trailer singleton is at the end of the doc.
110 class PDFTrailerElement : public PDFElement
112 PDFDocument& m_rDoc;
113 std::map<OString, PDFElement*> m_aDictionary;
114 /// Location of the end of the trailer token.
115 sal_uInt64 m_nOffset = 0;
117 public:
118 explicit PDFTrailerElement(PDFDocument& rDoc);
119 bool Read(SvStream& rStream) override;
120 PDFElement* Lookup(const OString& rDictionaryKey);
121 sal_uInt64 GetLocation() const;
124 XRefEntry::XRefEntry()
125 : m_eType(XRefEntryType::NOT_COMPRESSED),
126 m_nOffset(0),
127 m_nGenerationNumber(0),
128 m_bDirty(false)
132 PDFDocument::PDFDocument()
133 : m_pTrailer(nullptr),
134 m_pXRefStream(nullptr)
138 bool PDFDocument::RemoveSignature(size_t nPosition)
140 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
141 if (nPosition >= aSignatures.size())
143 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
144 return false;
147 if (aSignatures.size() != m_aEOFs.size() - 1)
149 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures and incremental updates");
150 return false;
153 // The EOF offset is the end of the original file, without the signature at
154 // nPosition.
155 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
156 // Drop all bytes after the current position.
157 m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
159 return m_aEditBuffer.good();
162 sal_uInt32 PDFDocument::GetNextSignature()
164 sal_uInt32 nRet = 0;
165 for (const auto& pSignature : GetSignatureWidgets())
167 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
168 if (!pT)
169 continue;
171 const OString& rValue = pT->GetValue();
172 const OString aPrefix = "Signature";
173 if (!rValue.startsWith(aPrefix))
174 continue;
176 nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
179 return nRet + 1;
182 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset)
184 // Write signature object.
185 sal_Int32 nSignatureId = m_aXRef.size();
186 XRefEntry aSignatureEntry;
187 aSignatureEntry.m_nOffset = m_aEditBuffer.Tell();
188 aSignatureEntry.m_bDirty = true;
189 m_aXRef[nSignatureId] = aSignatureEntry;
190 OStringBuffer aSigBuffer;
191 aSigBuffer.append(nSignatureId);
192 aSigBuffer.append(" 0 obj\n");
193 aSigBuffer.append("<</Contents <");
194 rContentOffset = aSignatureEntry.m_nOffset + aSigBuffer.getLength();
195 // Reserve space for the PKCS#7 object.
196 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
197 comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
198 aSigBuffer.append(aContentFiller.makeStringAndClear());
199 aSigBuffer.append(">\n/Type/Sig/SubFilter");
200 if (bAdES)
201 aSigBuffer.append("/ETSI.CAdES.detached");
202 else
203 aSigBuffer.append("/adbe.pkcs7.detached");
205 // Time of signing.
206 aSigBuffer.append(" /M (");
207 aSigBuffer.append(vcl::PDFWriter::GetDateTime());
208 aSigBuffer.append(")");
210 // Byte range: we can write offset1-length1 and offset2 right now, will
211 // write length2 later.
212 aSigBuffer.append(" /ByteRange [ 0 ");
213 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
214 aSigBuffer.append(rContentOffset - 1);
215 aSigBuffer.append(" ");
216 aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
217 aSigBuffer.append(" ");
218 rLastByteRangeOffset = aSignatureEntry.m_nOffset + aSigBuffer.getLength();
219 // We don't know how many bytes we need for the last ByteRange value, this
220 // should be enough.
221 OStringBuffer aByteRangeFiller;
222 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
223 aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
224 // Finish the Sig obj.
225 aSigBuffer.append(" /Filter/Adobe.PPKMS");
227 if (!rDescription.isEmpty())
229 aSigBuffer.append("/Reason<");
230 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
231 aSigBuffer.append(">");
234 aSigBuffer.append(" >>\nendobj\n\n");
235 m_aEditBuffer.WriteOString(aSigBuffer.toString());
237 return nSignatureId;
240 sal_Int32 PDFDocument::WriteAppearanceObject()
242 // Write appearance object.
243 sal_Int32 nAppearanceId = m_aXRef.size();
244 XRefEntry aAppearanceEntry;
245 aAppearanceEntry.m_nOffset = m_aEditBuffer.Tell();
246 aAppearanceEntry.m_bDirty = true;
247 m_aXRef[nAppearanceId] = aAppearanceEntry;
248 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
249 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
250 m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
251 m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
252 m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
254 return nAppearanceId;
257 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId)
259 // Decide what identifier to use for the new signature.
260 sal_uInt32 nNextSignature = GetNextSignature();
262 // Write the Annot object, references nSignatureId and nAppearanceId.
263 sal_Int32 nAnnotId = m_aXRef.size();
264 XRefEntry aAnnotEntry;
265 aAnnotEntry.m_nOffset = m_aEditBuffer.Tell();
266 aAnnotEntry.m_bDirty = true;
267 m_aXRef[nAnnotId] = aAnnotEntry;
268 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
269 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
270 m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
271 m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n");
272 m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
273 m_aEditBuffer.WriteCharPtr("/P ");
274 m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
275 m_aEditBuffer.WriteCharPtr(" 0 R\n");
276 m_aEditBuffer.WriteCharPtr("/T(Signature");
277 m_aEditBuffer.WriteUInt32AsString(nNextSignature);
278 m_aEditBuffer.WriteCharPtr(")\n");
279 m_aEditBuffer.WriteCharPtr("/V ");
280 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
281 m_aEditBuffer.WriteCharPtr(" 0 R\n");
282 m_aEditBuffer.WriteCharPtr("/DV ");
283 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
284 m_aEditBuffer.WriteCharPtr(" 0 R\n");
285 m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
286 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
287 m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
288 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
290 return nAnnotId;
293 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
295 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
296 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
297 if (pAnnotsReference)
299 // Write the updated Annots key of the Page object.
300 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
301 if (!pAnnotsObject)
303 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
304 return false;
307 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
308 m_aXRef[nAnnotsId].m_eType = XRefEntryType::NOT_COMPRESSED;
309 m_aXRef[nAnnotsId].m_nOffset = m_aEditBuffer.Tell();
310 m_aXRef[nAnnotsId].m_nGenerationNumber = 0;
311 m_aXRef[nAnnotsId].m_bDirty = true;
312 m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
313 m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
315 // Write existing references.
316 PDFArrayElement* pArray = pAnnotsObject->GetArray();
317 if (!pArray)
319 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
320 return false;
323 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
325 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
326 if (!pReference)
327 continue;
329 if (i)
330 m_aEditBuffer.WriteCharPtr(" ");
331 m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
332 m_aEditBuffer.WriteCharPtr(" 0 R");
334 // Write our reference.
335 m_aEditBuffer.WriteCharPtr(" ");
336 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
337 m_aEditBuffer.WriteCharPtr(" 0 R");
339 m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
341 else
343 // Write the updated first page object, references nAnnotId.
344 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
345 if (nFirstPageId >= m_aXRef.size())
347 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
348 return false;
350 m_aXRef[nFirstPageId].m_nOffset = m_aEditBuffer.Tell();
351 m_aXRef[nFirstPageId].m_bDirty = true;
352 m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
353 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
354 m_aEditBuffer.WriteCharPtr("<<");
355 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
356 if (!pAnnotsArray)
358 // No Annots key, just write the key with a single reference.
359 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + rFirstPage.GetDictionaryOffset(), rFirstPage.GetDictionaryLength());
360 m_aEditBuffer.WriteCharPtr("/Annots[");
361 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
362 m_aEditBuffer.WriteCharPtr(" 0 R]");
364 else
366 // Annots key is already there, insert our reference at the end.
367 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
369 // Offset right before the end of the Annots array.
370 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots") + pDictionary->GetKeyValueLength("Annots") - 1;
371 // Length of beginning of the dictionary -> Annots end.
372 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
373 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + rFirstPage.GetDictionaryOffset(), nAnnotsBeforeEndLength);
374 m_aEditBuffer.WriteCharPtr(" ");
375 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
376 m_aEditBuffer.WriteCharPtr(" 0 R");
377 // Length of Annots end -> end of the dictionary.
378 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset() + rFirstPage.GetDictionaryLength() - nAnnotsEndOffset;
379 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nAnnotsEndOffset, nAnnotsAfterEndLength);
381 m_aEditBuffer.WriteCharPtr(">>");
382 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
385 return true;
388 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
390 if (m_pXRefStream)
391 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
392 else
394 if (!m_pTrailer)
396 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
397 return false;
399 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
401 if (!pRoot)
403 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
404 return false;
406 PDFObjectElement* pCatalog = pRoot->LookupObject();
407 if (!pCatalog)
409 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
410 return false;
412 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
413 if (nCatalogId >= m_aXRef.size())
415 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
416 return false;
418 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
419 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
420 if (pAcroFormReference)
422 // Write the updated AcroForm key of the Catalog object.
423 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
424 if (!pAcroFormObject)
426 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
427 return false;
430 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
431 m_aXRef[nAcroFormId].m_eType = XRefEntryType::NOT_COMPRESSED;
432 m_aXRef[nAcroFormId].m_nOffset = m_aEditBuffer.Tell();
433 m_aXRef[nAcroFormId].m_nGenerationNumber = 0;
434 m_aXRef[nAcroFormId].m_bDirty = true;
435 m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
436 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
438 // If this is nullptr, then the AcroForm object is not in an object stream.
439 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
441 if (!pAcroFormObject->Lookup("Fields"))
443 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object without required Fields key");
444 return false;
447 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
448 if (!pAcroFormDictionary)
450 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
451 return false;
454 // Offset right before the end of the Fields array.
455 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields") + pAcroFormDictionary->GetKeyValueLength("Fields") - strlen("]");
456 // Length of beginning of the object dictionary -> Fields end.
457 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
458 if (pStreamBuffer)
459 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
460 else
462 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
463 m_aEditBuffer.WriteCharPtr("<<");
464 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pAcroFormObject->GetDictionaryOffset(), nFieldsBeforeEndLength);
467 // Append our reference at the end of the Fields array.
468 m_aEditBuffer.WriteCharPtr(" ");
469 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
470 m_aEditBuffer.WriteCharPtr(" 0 R");
472 // Length of Fields end -> end of the object dictionary.
473 if (pStreamBuffer)
475 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
476 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData()) + nFieldsEndOffset, nFieldsAfterEndLength);
478 else
480 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset() + pAcroFormObject->GetDictionaryLength() - nFieldsEndOffset;
481 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nFieldsEndOffset, nFieldsAfterEndLength);
482 m_aEditBuffer.WriteCharPtr(">>");
485 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
487 else
489 // Write the updated Catalog object, references nAnnotId.
490 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
491 m_aXRef[nCatalogId].m_nOffset = m_aEditBuffer.Tell();
492 m_aXRef[nCatalogId].m_bDirty = true;
493 m_aEditBuffer.WriteUInt32AsString(nCatalogId);
494 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
495 m_aEditBuffer.WriteCharPtr("<<");
496 if (!pAcroFormDictionary)
498 // No AcroForm key, assume no signatures.
499 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pCatalog->GetDictionaryOffset(), pCatalog->GetDictionaryLength());
500 m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
501 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
502 m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
504 else
506 // AcroForm key is already there, insert our reference at the Fields end.
507 auto it = pAcroFormDictionary->GetItems().find("Fields");
508 if (it == pAcroFormDictionary->GetItems().end())
510 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
511 return false;
514 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
515 if (!pFields)
517 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
518 return false;
521 // Offset right before the end of the Fields array.
522 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields") + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
523 // Length of beginning of the Catalog dictionary -> Fields end.
524 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
525 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + pCatalog->GetDictionaryOffset(), nFieldsBeforeEndLength);
526 m_aEditBuffer.WriteCharPtr(" ");
527 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
528 m_aEditBuffer.WriteCharPtr(" 0 R");
529 // Length of Fields end -> end of the Catalog dictionary.
530 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset() + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
531 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData()) + nFieldsEndOffset, nFieldsAfterEndLength);
533 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
536 return true;
539 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot)
541 if (m_pXRefStream)
543 // Write the xref stream.
544 // This is a bit meta: the xref stream stores its own offset.
545 sal_Int32 nXRefStreamId = m_aXRef.size();
546 XRefEntry aXRefStreamEntry;
547 aXRefStreamEntry.m_nOffset = nXRefOffset;
548 aXRefStreamEntry.m_bDirty = true;
549 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
551 // Write stream data.
552 SvMemoryStream aXRefStream;
553 const size_t nOffsetLen = 3;
554 // 3 additional bytes: predictor, the first and the third field.
555 const size_t nLineLength = nOffsetLen + 3;
556 // This is the line as it appears before tweaking according to the predictor.
557 std::vector<unsigned char> aOrigLine(nLineLength);
558 // This is the previous line.
559 std::vector<unsigned char> aPrevLine(nLineLength);
560 // This is the line as written to the stream.
561 std::vector<unsigned char> aFilteredLine(nLineLength);
562 for (const auto& rXRef : m_aXRef)
564 const XRefEntry& rEntry = rXRef.second;
566 if (!rEntry.m_bDirty)
567 continue;
569 // Predictor.
570 size_t nPos = 0;
571 // PNG prediction: up (on all rows).
572 aOrigLine[nPos++] = 2;
574 // First field.
575 unsigned char nType = 0;
576 switch (rEntry.m_eType)
578 case XRefEntryType::FREE:
579 nType = 0;
580 break;
581 case XRefEntryType::NOT_COMPRESSED:
582 nType = 1;
583 break;
584 case XRefEntryType::COMPRESSED:
585 nType = 2;
586 break;
588 aOrigLine[nPos++] = nType;
590 // Second field.
591 for (size_t i = 0; i < nOffsetLen; ++i)
593 size_t nByte = nOffsetLen - i - 1;
594 // Fields requiring more than one byte are stored with the
595 // high-order byte first.
596 unsigned char nCh = (rEntry.m_nOffset & (0xff << (nByte * 8))) >> (nByte * 8);
597 aOrigLine[nPos++] = nCh;
600 // Third field.
601 aOrigLine[nPos++] = 0;
603 // Now apply the predictor.
604 aFilteredLine[0] = aOrigLine[0];
605 for (size_t i = 1; i < nLineLength; ++i)
607 // Count the delta vs the previous line.
608 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
609 // Remember the new reference.
610 aPrevLine[i] = aOrigLine[i];
613 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
616 m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
617 m_aEditBuffer.WriteCharPtr(" 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
619 // ID.
620 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
621 if (pID)
623 const std::vector<PDFElement*>& rElements = pID->GetElements();
624 m_aEditBuffer.WriteCharPtr("/ID [ <");
625 for (size_t i = 0; i < rElements.size(); ++i)
627 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
628 if (!pIDString)
629 continue;
631 m_aEditBuffer.WriteOString(pIDString->GetValue());
632 if ((i + 1) < rElements.size())
633 m_aEditBuffer.WriteCharPtr("> <");
635 m_aEditBuffer.WriteCharPtr("> ] ");
638 // Index.
639 m_aEditBuffer.WriteCharPtr("/Index [ ");
640 for (const auto& rXRef : m_aXRef)
642 if (!rXRef.second.m_bDirty)
643 continue;
645 m_aEditBuffer.WriteUInt32AsString(rXRef.first);
646 m_aEditBuffer.WriteCharPtr(" 1 ");
648 m_aEditBuffer.WriteCharPtr("] ");
650 // Info.
651 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
652 if (pInfo)
654 m_aEditBuffer.WriteCharPtr("/Info ");
655 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
656 m_aEditBuffer.WriteCharPtr(" ");
657 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
658 m_aEditBuffer.WriteCharPtr(" R ");
661 // Length.
662 m_aEditBuffer.WriteCharPtr("/Length ");
664 ZCodec aZCodec;
665 aZCodec.BeginCompression();
666 aXRefStream.Seek(0);
667 SvMemoryStream aStream;
668 aZCodec.Compress(aXRefStream, aStream);
669 aZCodec.EndCompression();
670 aXRefStream.Seek(0);
671 aXRefStream.SetStreamSize(0);
672 aStream.Seek(0);
673 aXRefStream.WriteStream(aStream);
675 m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
677 if (!m_aStartXRefs.empty())
679 // Write location of the previous cross-reference section.
680 m_aEditBuffer.WriteCharPtr("/Prev ");
681 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
684 // Root.
685 m_aEditBuffer.WriteCharPtr("/Root ");
686 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
687 m_aEditBuffer.WriteCharPtr(" ");
688 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
689 m_aEditBuffer.WriteCharPtr(" R ");
691 // Size.
692 m_aEditBuffer.WriteCharPtr("/Size ");
693 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
695 m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
696 aXRefStream.Seek(0);
697 m_aEditBuffer.WriteStream(aXRefStream);
698 m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
700 else
702 // Write the xref table.
703 m_aEditBuffer.WriteCharPtr("xref\n");
704 for (const auto& rXRef : m_aXRef)
706 size_t nObject = rXRef.first;
707 size_t nOffset = rXRef.second.m_nOffset;
708 if (!rXRef.second.m_bDirty)
709 continue;
711 m_aEditBuffer.WriteUInt32AsString(nObject);
712 m_aEditBuffer.WriteCharPtr(" 1\n");
713 OStringBuffer aBuffer;
714 aBuffer.append(static_cast<sal_Int32>(nOffset));
715 while (aBuffer.getLength() < 10)
716 aBuffer.insert(0, "0");
717 if (nObject == 0)
718 aBuffer.append(" 65535 f \n");
719 else
720 aBuffer.append(" 00000 n \n");
721 m_aEditBuffer.WriteOString(aBuffer.toString());
724 // Write the trailer.
725 m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
726 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
727 m_aEditBuffer.WriteCharPtr("/Root ");
728 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
729 m_aEditBuffer.WriteCharPtr(" ");
730 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
731 m_aEditBuffer.WriteCharPtr(" R\n");
732 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
733 if (pInfo)
735 m_aEditBuffer.WriteCharPtr("/Info ");
736 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
737 m_aEditBuffer.WriteCharPtr(" ");
738 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
739 m_aEditBuffer.WriteCharPtr(" R\n");
741 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
742 if (pID)
744 const std::vector<PDFElement*>& rElements = pID->GetElements();
745 m_aEditBuffer.WriteCharPtr("/ID [ <");
746 for (size_t i = 0; i < rElements.size(); ++i)
748 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
749 if (!pIDString)
750 continue;
752 m_aEditBuffer.WriteOString(pIDString->GetValue());
753 if ((i + 1) < rElements.size())
754 m_aEditBuffer.WriteCharPtr(">\n<");
756 m_aEditBuffer.WriteCharPtr("> ]\n");
759 if (!m_aStartXRefs.empty())
761 // Write location of the previous cross-reference section.
762 m_aEditBuffer.WriteCharPtr("/Prev ");
763 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
766 m_aEditBuffer.WriteCharPtr(">>\n");
770 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES)
772 m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
773 m_aEditBuffer.WriteCharPtr("\n");
775 sal_uInt64 nSignatureLastByteRangeOffset = 0;
776 sal_Int64 nSignatureContentOffset = 0;
777 sal_Int32 nSignatureId = WriteSignatureObject(rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
779 sal_Int32 nAppearanceId = WriteAppearanceObject();
781 std::vector<PDFObjectElement*> aPages = GetPages();
782 if (aPages.empty() || !aPages[0])
784 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
785 return false;
788 PDFObjectElement& rFirstPage = *aPages[0];
789 sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId);
791 if (!WritePageObject(rFirstPage, nAnnotId))
793 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
794 return false;
797 PDFReferenceElement* pRoot = nullptr;
798 if (!WriteCatalogObject(nAnnotId, pRoot))
800 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
801 return false;
804 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
805 WriteXRef(nXRefOffset, pRoot);
807 // Write startxref.
808 m_aEditBuffer.WriteCharPtr("startxref\n");
809 m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
810 m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
812 // Finalize the signature, now that we know the total file size.
813 // Calculate the length of the last byte range.
814 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
815 sal_Int64 nLastByteRangeLength = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
816 // Write the length to the buffer.
817 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
818 OStringBuffer aByteRangeBuffer;
819 aByteRangeBuffer.append(nLastByteRangeLength);
820 aByteRangeBuffer.append(" ]");
821 m_aEditBuffer.WriteOString(aByteRangeBuffer.toString());
823 // Create the PKCS#7 object.
824 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
825 if (!aDerEncoded.hasElements())
827 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
828 return false;
831 m_aEditBuffer.Seek(0);
832 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
833 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
834 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
836 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
837 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
838 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
839 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
841 OStringBuffer aCMSHexBuffer;
842 vcl::PDFWriter::PDFSignContext aSignContext(aCMSHexBuffer);
843 aSignContext.m_pDerEncoded = aDerEncoded.getArray();
844 aSignContext.m_nDerEncoded = aDerEncoded.getLength();
845 aSignContext.m_pByteRange1 = aBuffer1.get();
846 aSignContext.m_nByteRange1 = nBufferSize1;
847 aSignContext.m_pByteRange2 = aBuffer2.get();
848 aSignContext.m_nByteRange2 = nBufferSize2;
849 if (!vcl::PDFWriter::Sign(aSignContext))
851 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
852 return false;
855 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
857 m_aEditBuffer.Seek(nSignatureContentOffset);
858 m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
860 return true;
863 bool PDFDocument::Write(SvStream& rStream)
865 m_aEditBuffer.Seek(0);
866 rStream.WriteStream(m_aEditBuffer);
867 return rStream.good();
870 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObjectElement)
872 // Last seen object token.
873 PDFObjectElement* pObject = pObjectElement;
874 PDFNameElement* pObjectKey = nullptr;
875 PDFObjectElement* pObjectStream = nullptr;
876 bool bInXRef = false;
877 // The next number will be an xref offset.
878 bool bInStartXRef = false;
879 // Dictionary depth, so we know when we're outside any dictionaries.
880 int nDictionaryDepth = 0;
881 // Array depth, only the offset/length of the toplevel array is tracked.
882 int nArrayDepth = 0;
883 // Last seen array token that's outside any dictionaries.
884 PDFArrayElement* pArray = nullptr;
885 // If we're inside an obj/endobj pair.
886 bool bInObject = false;
887 while (true)
889 char ch;
890 rStream.ReadChar(ch);
891 if (rStream.IsEof())
892 break;
894 switch (ch)
896 case '%':
898 auto pComment = new PDFCommentElement(*this);
899 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
900 rStream.SeekRel(-1);
901 if (!rElements.back()->Read(rStream))
903 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
904 return false;
906 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty() && m_aEOFs.back() == rStream.Tell())
908 // Found EOF and partial parsing requested, we're done.
909 return true;
911 break;
913 case '<':
915 // Dictionary or hex string.
916 rStream.ReadChar(ch);
917 rStream.SeekRel(-2);
918 if (ch == '<')
920 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
921 ++nDictionaryDepth;
923 else
924 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
925 if (!rElements.back()->Read(rStream))
927 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
928 return false;
930 break;
932 case '>':
934 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
935 --nDictionaryDepth;
936 rStream.SeekRel(-1);
937 if (!rElements.back()->Read(rStream))
939 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
940 return false;
942 break;
944 case '[':
946 auto pArr = new PDFArrayElement(pObject);
947 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
948 if (nDictionaryDepth == 0 && nArrayDepth == 0)
950 // The array is attached directly, inform the object.
951 pArray = pArr;
952 if (pObject)
954 pObject->SetArray(pArray);
955 pObject->SetArrayOffset(rStream.Tell());
958 ++nArrayDepth;
959 rStream.SeekRel(-1);
960 if (!rElements.back()->Read(rStream))
962 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
963 return false;
965 break;
967 case ']':
969 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
970 --nArrayDepth;
971 if (nArrayDepth == 0)
972 pArray = nullptr;
973 rStream.SeekRel(-1);
974 if (nDictionaryDepth == 0 && nArrayDepth == 0)
976 if (pObject)
978 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
981 if (!rElements.back()->Read(rStream))
983 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
984 return false;
986 break;
988 case '/':
990 auto pNameElement = new PDFNameElement();
991 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
992 rStream.SeekRel(-1);
993 if (!pNameElement->Read(rStream))
995 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
996 return false;
998 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type" && pNameElement->GetValue() == "ObjStm")
999 pObjectStream = pObject;
1000 else
1001 pObjectKey = pNameElement;
1002 break;
1004 case '(':
1006 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1007 rStream.SeekRel(-1);
1008 if (!rElements.back()->Read(rStream))
1010 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1011 return false;
1013 break;
1015 default:
1017 if (isdigit(ch) || ch == '-')
1019 // Numbering object: an integer or a real.
1020 auto pNumberElement = new PDFNumberElement();
1021 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1022 rStream.SeekRel(-1);
1023 if (!pNumberElement->Read(rStream))
1025 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1026 return false;
1028 if (bInStartXRef)
1030 bInStartXRef = false;
1031 m_aStartXRefs.push_back(pNumberElement->GetValue());
1033 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1034 if (it != m_aOffsetObjects.end())
1035 m_pXRefStream = it->second;
1037 else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1038 // Number element inside an object, but outside a
1039 // dictionary / array: remember it.
1040 pObject->SetNumberElement(pNumberElement);
1042 else if (isalpha(ch))
1044 // Possible keyword, like "obj".
1045 rStream.SeekRel(-1);
1046 OString aKeyword = ReadKeyword(rStream);
1048 bool bObj = aKeyword == "obj";
1049 if (bObj || aKeyword == "R")
1051 size_t nElements = rElements.size();
1052 if (nElements < 2)
1054 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two tokens before 'obj' or 'R' keyword");
1055 return false;
1058 auto pObjectNumber = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1059 auto pGenerationNumber = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1060 if (!pObjectNumber || !pGenerationNumber)
1062 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or generation number before 'obj' or 'R' keyword");
1063 return false;
1066 if (bObj)
1068 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(), pGenerationNumber->GetValue());
1069 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1070 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1071 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1072 bInObject = true;
1074 else
1076 auto pReference = new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber);
1077 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1078 if (pArray)
1079 // Reference is part of a direct (non-dictionary) array, inform the array.
1080 pArray->PushBack(rElements.back().get());
1081 if (bInObject && nDictionaryDepth > 0 && pObject)
1082 // Inform the object about a new in-dictionary reference.
1083 pObject->AddDictionaryReference(pReference);
1085 if (!rElements.back()->Read(rStream))
1087 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFElement::Read() failed");
1088 return false;
1091 else if (aKeyword == "stream")
1093 // Look up the length of the stream from the parent object's dictionary.
1094 size_t nLength = 0;
1095 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1097 // Iterate in reverse order.
1098 size_t nIndex = rElements.size() - nElement - 1;
1099 PDFElement* pElement = rElements[nIndex].get();
1100 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1101 if (!pObj)
1102 continue;
1104 PDFElement* pLookup = pObj->Lookup("Length");
1105 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1106 if (pReference)
1108 // Length is provided as a reference.
1109 nLength = pReference->LookupNumber(rStream);
1110 break;
1113 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1114 if (pNumber)
1116 // Length is provided directly.
1117 nLength = pNumber->GetValue();
1118 break;
1121 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: found no Length key for stream keyword");
1122 return false;
1125 PDFDocument::SkipLineBreaks(rStream);
1126 auto pStreamElement = new PDFStreamElement(nLength);
1127 if (pObject)
1128 pObject->SetStream(pStreamElement);
1129 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1130 if (!rElements.back()->Read(rStream))
1132 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1133 return false;
1136 else if (aKeyword == "endstream")
1138 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1139 if (!rElements.back()->Read(rStream))
1141 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1142 return false;
1145 else if (aKeyword == "endobj")
1147 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1148 if (!rElements.back()->Read(rStream))
1150 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1151 return false;
1153 if (eMode == TokenizeMode::END_OF_OBJECT)
1155 // Found endobj and only object parsing was requested, we're done.
1156 return true;
1159 if (pObjectStream)
1161 // We're at the end of an object stream, parse the stored objects.
1162 pObjectStream->ParseStoredObjects();
1163 pObjectStream = nullptr;
1164 pObjectKey = nullptr;
1166 bInObject = false;
1168 else if (aKeyword == "true" || aKeyword == "false")
1169 rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean())));
1170 else if (aKeyword == "null")
1171 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1172 else if (aKeyword == "xref")
1173 // Allow 'f' and 'n' keywords.
1174 bInXRef = true;
1175 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1178 else if (aKeyword == "trailer")
1180 auto pTrailer = new PDFTrailerElement(*this);
1182 // Make it possible to find this trailer later by offset.
1183 pTrailer->Read(rStream);
1184 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1186 // When reading till the first EOF token only, remember
1187 // just the first trailer token.
1188 if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1189 m_pTrailer = pTrailer;
1190 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1192 else if (aKeyword == "startxref")
1194 bInStartXRef = true;
1196 else
1198 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '" << aKeyword << "' keyword at byte position " << rStream.Tell());
1199 return false;
1202 else
1204 if (!isspace(ch))
1206 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: " << ch << " at byte position " << rStream.Tell());
1207 return false;
1210 break;
1215 return true;
1218 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1220 m_aIDObjects[nID] = pObject;
1223 bool PDFDocument::Read(SvStream& rStream)
1225 // Check file magic.
1226 std::vector<sal_Int8> aHeader(5);
1227 rStream.Seek(0);
1228 rStream.ReadBytes(aHeader.data(), aHeader.size());
1229 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F' || aHeader[4] != '-')
1231 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1232 return false;
1235 // Allow later editing of the contents in-memory.
1236 rStream.Seek(0);
1237 m_aEditBuffer.WriteStream(rStream);
1239 // Look up the offset of the xref table.
1240 size_t nStartXRef = FindStartXRef(rStream);
1241 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1242 if (nStartXRef == 0)
1244 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1245 return false;
1247 while (true)
1249 rStream.Seek(nStartXRef);
1250 OString aKeyword = ReadKeyword(rStream);
1251 if (aKeyword.isEmpty())
1252 ReadXRefStream(rStream);
1254 else
1256 if (aKeyword != "xref")
1258 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1259 return false;
1261 ReadXRef(rStream);
1262 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1264 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1265 return false;
1269 PDFNumberElement* pPrev = nullptr;
1270 if (m_pTrailer)
1272 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1274 // Remember the offset of this trailer in the correct order. It's
1275 // possible that newer trailers don't have a larger offset.
1276 m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1278 else if (m_pXRefStream)
1279 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1280 if (pPrev)
1281 nStartXRef = pPrev->GetValue();
1283 // Reset state, except the edit buffer.
1284 m_aElements.clear();
1285 m_aOffsetObjects.clear();
1286 m_aIDObjects.clear();
1287 m_aStartXRefs.clear();
1288 m_aEOFs.clear();
1289 m_pTrailer = nullptr;
1290 m_pXRefStream = nullptr;
1291 if (!pPrev)
1292 break;
1295 // Then we can tokenize the stream.
1296 rStream.Seek(0);
1297 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1300 OString PDFDocument::ReadKeyword(SvStream& rStream)
1302 OStringBuffer aBuf;
1303 char ch;
1304 rStream.ReadChar(ch);
1305 if (rStream.IsEof())
1306 return OString();
1307 while (isalpha(ch))
1309 aBuf.append(ch);
1310 rStream.ReadChar(ch);
1311 if (rStream.IsEof())
1312 return aBuf.toString();
1314 rStream.SeekRel(-1);
1315 return aBuf.toString();
1318 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1320 // Find the "startxref" token, somewhere near the end of the document.
1321 std::vector<char> aBuf(1024);
1322 rStream.Seek(STREAM_SEEK_TO_END);
1323 if (rStream.Tell() > aBuf.size())
1324 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1325 else
1326 // The document is really short, then just read it from the start.
1327 rStream.Seek(0);
1328 size_t nBeforePeek = rStream.Tell();
1329 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1330 rStream.Seek(nBeforePeek);
1331 if (nSize != aBuf.size())
1332 aBuf.resize(nSize);
1333 OString aPrefix("startxref");
1334 // Find the last startxref at the end of the document.
1335 auto itLastValid = aBuf.end();
1336 auto it = aBuf.begin();
1337 while (true)
1339 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1340 if (it == aBuf.end())
1341 break;
1342 else
1344 itLastValid = it;
1345 ++it;
1348 if (itLastValid == aBuf.end())
1350 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1351 return 0;
1354 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1355 if (rStream.IsEof())
1357 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1358 return 0;
1361 PDFDocument::SkipWhitespace(rStream);
1362 PDFNumberElement aNumber;
1363 if (!aNumber.Read(rStream))
1364 return 0;
1365 return aNumber.GetValue();
1368 void PDFDocument::ReadXRefStream(SvStream& rStream)
1370 // Look up the stream length in the object dictionary.
1371 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1373 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1374 return;
1377 if (m_aElements.empty())
1379 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1380 return;
1383 PDFObjectElement* pObject = nullptr;
1384 for (const auto& pElement : m_aElements)
1386 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1388 pObject = pObj;
1389 break;
1392 if (!pObject)
1394 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1395 return;
1398 // So that the Prev key can be looked up later.
1399 m_pXRefStream = pObject;
1401 PDFElement* pLookup = pObject->Lookup("Length");
1402 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1403 if (!pNumber)
1405 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1406 return;
1408 sal_uInt64 nLength = pNumber->GetValue();
1410 // Look up the stream offset.
1411 PDFStreamElement* pStream = nullptr;
1412 for (const auto& pElement : m_aElements)
1414 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1416 pStream = pS;
1417 break;
1420 if (!pStream)
1422 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1423 return;
1426 // Read and decompress it.
1427 rStream.Seek(pStream->GetOffset());
1428 std::vector<char> aBuf(nLength);
1429 rStream.ReadBytes(aBuf.data(), aBuf.size());
1431 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1432 if (!pFilter)
1434 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1435 return;
1438 if (pFilter->GetValue() != "FlateDecode")
1440 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1441 return;
1444 int nColumns = 1;
1445 int nPredictor = 1;
1446 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1448 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1449 auto it = rItems.find("Columns");
1450 if (it != rItems.end())
1451 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1452 nColumns = pColumns->GetValue();
1453 it = rItems.find("Predictor");
1454 if (it != rItems.end())
1455 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1456 nPredictor = pPredictor->GetValue();
1459 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1460 SvMemoryStream aStream;
1461 ZCodec aZCodec;
1462 aZCodec.BeginCompression();
1463 aZCodec.Decompress(aSource, aStream);
1464 if (!aZCodec.EndCompression())
1466 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1467 return;
1470 // Look up the first and the last entry we need to read.
1471 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1472 std::vector<size_t> aFirstObjects;
1473 std::vector<size_t> aNumberOfObjects;
1474 if (!pIndex)
1476 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1477 if (pSize)
1479 aFirstObjects.push_back(0);
1480 aNumberOfObjects.push_back(pSize->GetValue());
1482 else
1484 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1485 return;
1488 else
1490 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1491 size_t nFirstObject = 0;
1492 for (size_t i = 0; i < rIndexElements.size(); ++i)
1494 if (i % 2 == 0)
1496 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1497 if (!pFirstObject)
1499 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index has no first object");
1500 return;
1502 nFirstObject = pFirstObject->GetValue();
1503 continue;
1506 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1507 if (!pNumberOfObjects)
1509 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index has no number of objects");
1510 return;
1512 aFirstObjects.push_back(nFirstObject);
1513 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1517 // Look up the format of a single entry.
1518 const int nWSize = 3;
1519 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1520 if (!pW || pW->GetElements().size() < nWSize)
1522 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1523 return;
1525 int aW[nWSize];
1526 // First character is the (kind of) repeated predictor.
1527 int nLineLength = 1;
1528 for (size_t i = 0; i < nWSize; ++i)
1530 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1531 if (!pI)
1533 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1534 return;
1536 aW[i] = pI->GetValue();
1537 nLineLength += aW[i];
1540 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1542 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1543 return;
1546 aStream.Seek(0);
1547 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1549 size_t nFirstObject = aFirstObjects[nSubSection];
1550 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1552 // This is the line as read from the stream.
1553 std::vector<unsigned char> aOrigLine(nLineLength);
1554 // This is the line as it appears after tweaking according to nPredictor.
1555 std::vector<unsigned char> aFilteredLine(nLineLength);
1556 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1558 size_t nIndex = nFirstObject + nEntry;
1560 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1561 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1563 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is inconsistent with /DecodeParms/Predictor for object #" << nIndex);
1564 return;
1567 for (int i = 0; i < nLineLength; ++i)
1569 switch (nPredictor)
1571 case 1:
1572 // No prediction.
1573 break;
1574 case 12:
1575 // PNG prediction: up (on all rows).
1576 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1577 break;
1578 default:
1579 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: " << nPredictor);
1580 return;
1581 break;
1585 // First character is already handled above.
1586 int nPos = 1;
1587 size_t nType = 0;
1588 // Start of the current field in the stream data.
1589 int nOffset = nPos;
1590 for (; nPos < nOffset + aW[0]; ++nPos)
1592 unsigned char nCh = aFilteredLine[nPos];
1593 nType = (nType << 8) + nCh;
1596 // Start of the object in the file stream.
1597 size_t nStreamOffset = 0;
1598 nOffset = nPos;
1599 for (; nPos < nOffset + aW[1]; ++nPos)
1601 unsigned char nCh = aFilteredLine[nPos];
1602 nStreamOffset = (nStreamOffset << 8) + nCh;
1605 // Generation number of the object.
1606 size_t nGenerationNumber = 0;
1607 nOffset = nPos;
1608 for (; nPos < nOffset + aW[2]; ++nPos)
1610 unsigned char nCh = aFilteredLine[nPos];
1611 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1614 // Ignore invalid nType.
1615 if (nType <= 2)
1617 if (m_aXRef.find(nIndex) == m_aXRef.end())
1619 XRefEntry aEntry;
1620 switch (nType)
1622 case 0:
1623 aEntry.m_eType = XRefEntryType::FREE;
1624 break;
1625 case 1:
1626 aEntry.m_eType = XRefEntryType::NOT_COMPRESSED;
1627 break;
1628 case 2:
1629 aEntry.m_eType = XRefEntryType::COMPRESSED;
1630 break;
1632 aEntry.m_nOffset = nStreamOffset;
1633 aEntry.m_nGenerationNumber = nGenerationNumber;
1634 m_aXRef[nIndex] = aEntry;
1641 void PDFDocument::ReadXRef(SvStream& rStream)
1643 PDFDocument::SkipWhitespace(rStream);
1645 while (true)
1647 PDFNumberElement aFirstObject;
1648 if (!aFirstObject.Read(rStream))
1650 // Next token is not a number, it'll be the trailer.
1651 return;
1654 if (aFirstObject.GetValue() < 0)
1656 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1657 return;
1660 PDFDocument::SkipWhitespace(rStream);
1661 PDFNumberElement aNumberOfEntries;
1662 if (!aNumberOfEntries.Read(rStream))
1664 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1665 return;
1668 if (aNumberOfEntries.GetValue() < 0)
1670 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1671 return;
1674 size_t nSize = aNumberOfEntries.GetValue();
1675 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1677 size_t nIndex = aFirstObject.GetValue() + nEntry;
1678 PDFDocument::SkipWhitespace(rStream);
1679 PDFNumberElement aOffset;
1680 if (!aOffset.Read(rStream))
1682 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1683 return;
1686 PDFDocument::SkipWhitespace(rStream);
1687 PDFNumberElement aGenerationNumber;
1688 if (!aGenerationNumber.Read(rStream))
1690 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1691 return;
1694 PDFDocument::SkipWhitespace(rStream);
1695 OString aKeyword = ReadKeyword(rStream);
1696 if (aKeyword != "f" && aKeyword != "n")
1698 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1699 return;
1701 // xrefs are read in reverse order, so never update an existing
1702 // offset with an older one.
1703 if (m_aXRef.find(nIndex) == m_aXRef.end())
1705 XRefEntry aEntry;
1706 aEntry.m_nOffset = aOffset.GetValue();
1707 aEntry.m_nGenerationNumber = aGenerationNumber.GetValue();
1708 // Initially only the first entry is dirty.
1709 if (nIndex == 0)
1710 aEntry.m_bDirty = true;
1711 m_aXRef[nIndex] = aEntry;
1713 PDFDocument::SkipWhitespace(rStream);
1718 void PDFDocument::SkipWhitespace(SvStream& rStream)
1720 char ch = 0;
1722 while (true)
1724 rStream.ReadChar(ch);
1725 if (rStream.IsEof())
1726 break;
1728 if (!isspace(ch))
1730 rStream.SeekRel(-1);
1731 return;
1736 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1738 char ch = 0;
1740 while (true)
1742 rStream.ReadChar(ch);
1743 if (rStream.IsEof())
1744 break;
1746 if (ch != '\n' && ch != '\r')
1748 rStream.SeekRel(-1);
1749 return;
1754 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1756 auto it = m_aXRef.find(nIndex);
1757 if (it == m_aXRef.end() || it->second.m_eType == XRefEntryType::COMPRESSED)
1759 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #" << nIndex << ", but failed");
1760 return 0;
1763 return it->second.m_nOffset;
1766 const std::vector< std::unique_ptr<PDFElement> >& PDFDocument::GetElements()
1768 return m_aElements;
1771 /// Visits the page tree recursively, looking for page objects.
1772 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1774 auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1775 if (!pKids)
1777 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1778 return;
1781 pPages->setVisiting(true);
1783 for (const auto& pKid : pKids->GetElements())
1785 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1786 if (!pReference)
1787 continue;
1789 PDFObjectElement* pKidObject = pReference->LookupObject();
1790 if (!pKidObject)
1791 continue;
1793 // detect if visiting reenters itself
1794 if (pKidObject->alreadyVisiting())
1796 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1797 continue;
1800 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1801 if (pName && pName->GetValue() == "Pages")
1802 // Pages inside pages: recurse.
1803 visitPages(pKidObject, rRet);
1804 else
1805 // Found an actual page.
1806 rRet.push_back(pKidObject);
1809 pPages->setVisiting(false);
1812 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1814 std::vector<PDFObjectElement*> aRet;
1816 PDFReferenceElement* pRoot = nullptr;
1819 PDFTrailerElement* pTrailer = nullptr;
1820 if (!m_aTrailerOffsets.empty())
1822 // Get access to the latest trailer, and work with the keys of that
1823 // one.
1824 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1825 if (it != m_aOffsetTrailers.end())
1826 pTrailer = it->second;
1829 if (pTrailer)
1830 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1831 else if (m_pXRefStream)
1832 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1834 if (!pRoot)
1836 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1837 return aRet;
1840 PDFObjectElement* pCatalog = pRoot->LookupObject();
1841 if (!pCatalog)
1843 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1844 return aRet;
1847 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1848 if (!pPages)
1850 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue() << ") has no pages");
1851 return aRet;
1854 visitPages(pPages, aRet);
1856 return aRet;
1859 void PDFDocument::PushBackEOF(size_t nOffset)
1861 m_aEOFs.push_back(nOffset);
1864 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
1866 std::vector<PDFObjectElement*> aRet;
1868 std::vector<PDFObjectElement*> aPages = GetPages();
1870 for (const auto& pPage : aPages)
1872 if (!pPage)
1873 continue;
1875 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
1876 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
1877 if (!pAnnots)
1879 // Annots is not an array, see if it's a reference to an object
1880 // with a direct array.
1881 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
1882 if (pAnnotsRef)
1884 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
1886 pAnnots = pAnnotsObject->GetArray();
1891 if (!pAnnots)
1892 continue;
1894 for (const auto& pAnnot : pAnnots->GetElements())
1896 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
1897 if (!pReference)
1898 continue;
1900 PDFObjectElement* pAnnotObject = pReference->LookupObject();
1901 if (!pAnnotObject)
1902 continue;
1904 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
1905 if (!pFT || pFT->GetValue() != "Sig")
1906 continue;
1908 aRet.push_back(pAnnotObject);
1912 return aRet;
1915 int PDFDocument::AsHex(char ch)
1917 int nRet = 0;
1918 if (isdigit(ch))
1919 nRet = ch - '0';
1920 else
1922 if (ch >= 'a' && ch <= 'f')
1923 nRet = ch - 'a';
1924 else if (ch >= 'A' && ch <= 'F')
1925 nRet = ch - 'A';
1926 else
1927 return -1;
1928 nRet += 10;
1930 return nRet;
1933 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement* pElement)
1935 std::vector<unsigned char> aRet;
1936 const OString& rHex = pElement->GetValue();
1937 size_t nHexLen = rHex.getLength();
1939 int nByte = 0;
1940 int nCount = 2;
1941 for (size_t i = 0; i < nHexLen; ++i)
1943 nByte = nByte << 4;
1944 sal_Int8 nParsed = AsHex(rHex[i]);
1945 if (nParsed == -1)
1947 SAL_WARN("vcl.filter", "PDFDocument::DecodeHexString: invalid hex value");
1948 return aRet;
1950 nByte += nParsed;
1951 --nCount;
1952 if (!nCount)
1954 aRet.push_back(nByte);
1955 nCount = 2;
1956 nByte = 0;
1961 return aRet;
1964 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
1965 : m_rDoc(rDoc)
1969 bool PDFCommentElement::Read(SvStream& rStream)
1971 // Read from (including) the % char till (excluding) the end of the line/stream.
1972 OStringBuffer aBuf;
1973 char ch;
1974 rStream.ReadChar(ch);
1975 while (true)
1977 if (ch == '\n' || ch == '\r' || rStream.IsEof())
1979 m_aComment = aBuf.makeStringAndClear();
1981 if (m_aComment.startsWith("%%EOF"))
1982 m_rDoc.PushBackEOF(rStream.Tell());
1984 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
1985 return true;
1987 aBuf.append(ch);
1988 rStream.ReadChar(ch);
1991 return false;
1994 PDFNumberElement::PDFNumberElement() = default;
1996 bool PDFNumberElement::Read(SvStream& rStream)
1998 OStringBuffer aBuf;
1999 m_nOffset = rStream.Tell();
2000 char ch;
2001 rStream.ReadChar(ch);
2002 if (rStream.IsEof())
2004 return false;
2006 if (!isdigit(ch) && ch != '-' && ch != '.')
2008 rStream.SeekRel(-1);
2009 return false;
2011 while (!rStream.IsEof())
2013 if (!isdigit(ch) && ch != '-' && ch != '.')
2015 rStream.SeekRel(-1);
2016 m_nLength = rStream.Tell() - m_nOffset;
2017 m_fValue = aBuf.makeStringAndClear().toDouble();
2018 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2019 return true;
2021 aBuf.append(ch);
2022 rStream.ReadChar(ch);
2025 return false;
2028 sal_uInt64 PDFNumberElement::GetLocation() const
2030 return m_nOffset;
2033 sal_uInt64 PDFNumberElement::GetLength() const
2035 return m_nLength;
2038 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/)
2042 bool PDFBooleanElement::Read(SvStream& /*rStream*/)
2044 return true;
2047 bool PDFNullElement::Read(SvStream& /*rStream*/)
2049 return true;
2052 bool PDFHexStringElement::Read(SvStream& rStream)
2054 char ch;
2055 rStream.ReadChar(ch);
2056 if (ch != '<')
2058 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2059 return false;
2061 rStream.ReadChar(ch);
2063 OStringBuffer aBuf;
2064 while (!rStream.IsEof())
2066 if (ch == '>')
2068 m_aValue = aBuf.makeStringAndClear();
2069 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2070 return true;
2072 aBuf.append(ch);
2073 rStream.ReadChar(ch);
2076 return false;
2079 const OString& PDFHexStringElement::GetValue() const
2081 return m_aValue;
2084 bool PDFLiteralStringElement::Read(SvStream& rStream)
2086 char nPrevCh = 0;
2087 char ch = 0;
2088 rStream.ReadChar(ch);
2089 if (ch != '(')
2091 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2092 return false;
2094 nPrevCh = ch;
2095 rStream.ReadChar(ch);
2097 OStringBuffer aBuf;
2098 while (!rStream.IsEof())
2100 if (ch == ')' && nPrevCh != '\\')
2102 m_aValue = aBuf.makeStringAndClear();
2103 SAL_INFO("vcl.filter", "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2104 return true;
2106 aBuf.append(ch);
2107 nPrevCh = ch;
2108 rStream.ReadChar(ch);
2111 return false;
2114 const OString& PDFLiteralStringElement::GetValue() const
2116 return m_aValue;
2119 PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
2120 : m_rDoc(rDoc)
2124 bool PDFTrailerElement::Read(SvStream& rStream)
2126 m_nOffset = rStream.Tell();
2127 return true;
2130 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2132 if (m_aDictionary.empty())
2133 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2135 return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2138 sal_uInt64 PDFTrailerElement::GetLocation() const
2140 return m_nOffset;
2143 double PDFNumberElement::GetValue() const
2145 return m_fValue;
2148 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2149 : m_rDoc(rDoc),
2150 m_fObjectValue(fObjectValue),
2151 m_fGenerationValue(fGenerationValue),
2152 m_pNumberElement(nullptr),
2153 m_nDictionaryOffset(0),
2154 m_nDictionaryLength(0),
2155 m_pDictionaryElement(nullptr),
2156 m_nArrayOffset(0),
2157 m_nArrayLength(0),
2158 m_pArrayElement(nullptr),
2159 m_pStreamElement(nullptr)
2163 bool PDFObjectElement::Read(SvStream& /*rStream*/)
2165 SAL_INFO("vcl.filter", "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2166 return true;
2169 PDFDictionaryElement::PDFDictionaryElement() = default;
2171 size_t PDFDictionaryElement::Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2173 // The index of last parsed element, in case of nested dictionaries.
2174 size_t nRet = 0;
2176 if (!rDictionary.empty())
2177 return nRet;
2179 pThis->setParsing(true);
2181 auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2182 // This is set to non-nullptr here for nested dictionaries only.
2183 auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2185 // Find out where the dictionary for this object starts.
2186 size_t nIndex = 0;
2187 for (size_t i = 0; i < rElements.size(); ++i)
2189 if (rElements[i].get() == pThis)
2191 nIndex = i;
2192 break;
2196 OString aName;
2197 sal_uInt64 nNameOffset = 0;
2198 std::vector<PDFNumberElement*> aNumbers;
2199 // The array value we're in -- if any.
2200 PDFArrayElement* pArray = nullptr;
2201 sal_uInt64 nDictionaryOffset = 0;
2202 int nDictionaryDepth = 0;
2203 // Toplevel dictionary found (not inside an array).
2204 bool bDictionaryFound = false;
2205 // Toplevel array found (not inside a dictionary).
2206 bool bArrayFound = false;
2207 for (size_t i = nIndex; i < rElements.size(); ++i)
2209 // Dictionary tokens can be nested, track enter/leave.
2210 if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2212 bDictionaryFound = true;
2213 if (++nDictionaryDepth == 1)
2215 // First dictionary start, track start offset.
2216 nDictionaryOffset = pDictionary->m_nLocation;
2217 if (pThisObject)
2219 if (!bArrayFound)
2220 // The the toplevel dictionary of the object.
2221 pThisObject->SetDictionary(pDictionary);
2222 pThisDictionary = pDictionary;
2223 pThisObject->SetDictionaryOffset(nDictionaryOffset);
2226 else if (!pDictionary->alreadyParsing())
2228 // Nested dictionary.
2229 const size_t nexti
2230 = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2231 if (nexti >= i) // ensure we go forwards and not endlessly loop
2233 i = nexti;
2234 rDictionary[aName] = pDictionary;
2235 aName.clear();
2240 if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2242 if (--nDictionaryDepth == 0)
2244 // Last dictionary end, track length and stop parsing.
2245 if (pThisObject)
2246 pThisObject->SetDictionaryLength(pEndDictionary->GetLocation() - nDictionaryOffset);
2247 nRet = i;
2248 break;
2252 auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2253 if (pName)
2255 if (!aNumbers.empty())
2257 PDFNumberElement* pNumber = aNumbers.back();
2258 rDictionary[aName] = pNumber;
2259 if (pThisDictionary)
2261 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2262 pThisDictionary->SetKeyValueLength(aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2264 aName.clear();
2265 aNumbers.clear();
2268 if (aName.isEmpty())
2270 // Remember key.
2271 aName = pName->GetValue();
2272 nNameOffset = pName->GetLocation();
2274 else
2276 if (pArray)
2278 if (bDictionaryFound)
2279 // Array inside dictionary.
2280 pArray->PushBack(pName);
2282 else
2284 // Name-name key-value.
2285 rDictionary[aName] = pName;
2286 if (pThisDictionary)
2288 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2289 pThisDictionary->SetKeyValueLength(aName, pName->GetLocation() + pName->GetLength() - nNameOffset);
2291 aName.clear();
2294 continue;
2297 auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2298 if (pArr)
2300 bArrayFound = true;
2301 pArray = pArr;
2302 continue;
2305 auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2306 if (pArray && pEndArr)
2308 if (!aNumbers.empty())
2310 for (auto& pNumber : aNumbers)
2311 pArray->PushBack(pNumber);
2312 aNumbers.clear();
2314 rDictionary[aName] = pArray;
2315 if (pThisDictionary)
2317 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2318 // Include the ending ']' in the length of the key - (array)value pair length.
2319 pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2321 aName.clear();
2322 pArray = nullptr;
2323 continue;
2326 auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2327 if (pReference)
2329 if (!pArray)
2331 rDictionary[aName] = pReference;
2332 if (pThisDictionary)
2334 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2335 pThisDictionary->SetKeyValueLength(aName, pReference->GetOffset() - nNameOffset);
2337 aName.clear();
2339 else
2341 if (bDictionaryFound)
2342 // Array inside dictionary.
2343 pArray->PushBack(pReference);
2345 aNumbers.clear();
2346 continue;
2349 auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2350 if (pLiteralString)
2352 rDictionary[aName] = pLiteralString;
2353 if (pThisDictionary)
2354 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2355 aName.clear();
2356 continue;
2359 auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2360 if (pBoolean)
2362 rDictionary[aName] = pBoolean;
2363 if (pThisDictionary)
2364 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2365 aName.clear();
2366 continue;
2369 auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2370 if (pHexString)
2372 if (!pArray)
2374 rDictionary[aName] = pHexString;
2375 if (pThisDictionary)
2376 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2377 aName.clear();
2379 else
2381 pArray->PushBack(pHexString);
2383 continue;
2386 if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2387 break;
2389 // Just remember this, so that in case it's not a reference parameter,
2390 // we can handle it later.
2391 auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2392 if (pNumber)
2393 aNumbers.push_back(pNumber);
2396 if (!aNumbers.empty())
2398 rDictionary[aName] = aNumbers.back();
2399 if (pThisDictionary)
2400 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2401 aName.clear();
2402 aNumbers.clear();
2405 pThis->setParsing(false);
2407 return nRet;
2410 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey)
2412 auto it = rDictionary.find(rKey);
2413 if (it == rDictionary.end())
2414 return nullptr;
2416 return it->second;
2419 PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey)
2421 auto pKey = dynamic_cast<PDFReferenceElement*>(PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2422 if (!pKey)
2424 SAL_WARN("vcl.filter", "PDFDictionaryElement::LookupObject: no such key with reference value: " << rDictionaryKey);
2425 return nullptr;
2428 return pKey->LookupObject();
2431 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2433 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2436 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2438 if (m_aDictionary.empty())
2440 if (!m_aElements.empty())
2441 // This is a stored object in an object stream.
2442 PDFDictionaryElement::Parse(m_aElements, this, m_aDictionary);
2443 else
2444 // Normal object: elements are stored as members of the document itself.
2445 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2448 return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2451 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2453 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2454 if (!pKey)
2456 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: " << rDictionaryKey);
2457 return nullptr;
2460 return pKey->LookupObject();
2463 double PDFObjectElement::GetObjectValue() const
2465 return m_fObjectValue;
2468 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2470 m_nDictionaryOffset = nDictionaryOffset;
2473 sal_uInt64 PDFObjectElement::GetDictionaryOffset()
2475 if (m_aDictionary.empty())
2476 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2478 return m_nDictionaryOffset;
2481 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset)
2483 m_nArrayOffset = nArrayOffset;
2486 sal_uInt64 PDFObjectElement::GetArrayOffset()
2488 return m_nArrayOffset;
2491 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2493 m_aDictionaryKeyOffset[rKey] = nOffset;
2496 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2498 m_aDictionaryKeyValueLength[rKey] = nLength;
2501 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2503 auto it = m_aDictionaryKeyOffset.find(rKey);
2504 if (it == m_aDictionaryKeyOffset.end())
2505 return 0;
2507 return it->second;
2510 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2512 auto it = m_aDictionaryKeyValueLength.find(rKey);
2513 if (it == m_aDictionaryKeyValueLength.end())
2514 return 0;
2516 return it->second;
2519 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const
2521 return m_aItems;
2524 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2526 m_nDictionaryLength = nDictionaryLength;
2529 sal_uInt64 PDFObjectElement::GetDictionaryLength()
2531 if (m_aDictionary.empty())
2532 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2534 return m_nDictionaryLength;
2537 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength)
2539 m_nArrayLength = nArrayLength;
2542 sal_uInt64 PDFObjectElement::GetArrayLength()
2544 return m_nArrayLength;
2547 PDFDictionaryElement* PDFObjectElement::GetDictionary()
2549 if (m_aDictionary.empty())
2550 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2551 return m_pDictionaryElement;
2554 void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
2556 m_pDictionaryElement = pDictionaryElement;
2559 void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
2561 m_pNumberElement = pNumberElement;
2564 PDFNumberElement* PDFObjectElement::GetNumberElement() const
2566 return m_pNumberElement;
2569 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2571 return m_aDictionaryReferences;
2574 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
2576 m_aDictionaryReferences.push_back(pReference);
2579 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2581 if (m_aDictionary.empty())
2582 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2584 return m_aDictionary;
2587 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement)
2589 m_pArrayElement = pArrayElement;
2592 void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
2594 m_pStreamElement = pStreamElement;
2597 PDFStreamElement* PDFObjectElement::GetStream() const
2599 return m_pStreamElement;
2602 PDFArrayElement* PDFObjectElement::GetArray() const
2604 return m_pArrayElement;
2607 void PDFObjectElement::ParseStoredObjects()
2609 if (!m_pStreamElement)
2611 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2612 return;
2615 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2616 if (!pType || pType->GetValue() != "ObjStm")
2618 if (!pType)
2619 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2620 else
2621 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2622 return;
2625 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2626 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2628 if (!pFilter)
2629 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2630 else
2631 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2632 return;
2635 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2636 if (!pFirst)
2638 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2639 return;
2642 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2643 if (!pN)
2645 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2646 return;
2648 size_t nN = pN->GetValue();
2650 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2651 if (!pLength)
2653 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2654 return;
2656 size_t nLength = pLength->GetValue();
2658 // Read and decompress it.
2659 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2660 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2661 std::vector<char> aBuf(nLength);
2662 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2663 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2664 SvMemoryStream aStream;
2665 ZCodec aZCodec;
2666 aZCodec.BeginCompression();
2667 aZCodec.Decompress(aSource, aStream);
2668 if (!aZCodec.EndCompression())
2670 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2671 return;
2674 aStream.Seek(STREAM_SEEK_TO_END);
2675 nLength = aStream.Tell();
2676 aStream.Seek(0);
2677 std::vector<size_t> aObjNums;
2678 std::vector<size_t> aOffsets;
2679 std::vector<size_t> aLengths;
2680 // First iterate over and find out the lengths.
2681 for (size_t nObject = 0; nObject < nN; ++nObject)
2683 PDFNumberElement aObjNum;
2684 if (!aObjNum.Read(aStream))
2686 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: failed to read object number");
2687 return;
2689 aObjNums.push_back(aObjNum.GetValue());
2691 PDFDocument::SkipWhitespace(aStream);
2693 PDFNumberElement aByteOffset;
2694 if (!aByteOffset.Read(aStream))
2696 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2697 return;
2699 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2701 if (aOffsets.size() > 1)
2702 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2703 if (nObject + 1 == nN)
2704 aLengths.push_back(nLength - aOffsets.back());
2706 PDFDocument::SkipWhitespace(aStream);
2709 // Now create streams with the proper length and tokenize the data.
2710 for (size_t nObject = 0; nObject < nN; ++nObject)
2712 size_t nObjNum = aObjNums[nObject];
2713 size_t nOffset = aOffsets[nObject];
2714 size_t nLen = aLengths[nObject];
2716 aStream.Seek(nOffset);
2717 m_aStoredElements.push_back(o3tl::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2718 PDFObjectElement* pStored = m_aStoredElements.back().get();
2720 aBuf.clear();
2721 aBuf.resize(nLen);
2722 aStream.ReadBytes(aBuf.data(), aBuf.size());
2723 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2725 m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(), pStored);
2726 // This is how references know the object is stored inside this object stream.
2727 m_rDoc.SetIDObject(nObjNum, pStored);
2729 // Store the stream of the object in the object stream for later use.
2730 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2731 aStoredStream.Seek(0);
2732 pStreamBuffer->WriteStream(aStoredStream);
2733 pStored->SetStreamBuffer(pStreamBuffer);
2737 std::vector< std::unique_ptr<PDFElement> >& PDFObjectElement::GetStoredElements()
2739 return m_aElements;
2742 SvMemoryStream* PDFObjectElement::GetStreamBuffer() const
2744 return m_pStreamBuffer.get();
2747 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2749 m_pStreamBuffer = std::move(pStreamBuffer);
2752 PDFDocument& PDFObjectElement::GetDocument()
2754 return m_rDoc;
2757 PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject, PDFNumberElement& rGeneration)
2758 : m_rDoc(rDoc),
2759 m_fObjectValue(rObject.GetValue()),
2760 m_fGenerationValue(rGeneration.GetValue()),
2761 m_rObject(rObject)
2765 PDFNumberElement& PDFReferenceElement::GetObjectElement() const
2767 return m_rObject;
2770 bool PDFReferenceElement::Read(SvStream& rStream)
2772 SAL_INFO("vcl.filter", "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2773 m_nOffset = rStream.Tell();
2774 return true;
2777 sal_uInt64 PDFReferenceElement::GetOffset() const
2779 return m_nOffset;
2782 double PDFReferenceElement::LookupNumber(SvStream& rStream) const
2784 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2785 if (nOffset == 0)
2787 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #" << m_fObjectValue);
2788 return 0;
2791 sal_uInt64 nOrigPos = rStream.Tell();
2792 comphelper::ScopeGuard g([&]()
2794 rStream.Seek(nOrigPos);
2797 rStream.Seek(nOffset);
2799 PDFDocument::SkipWhitespace(rStream);
2800 PDFNumberElement aNumber;
2801 bool bRet = aNumber.Read(rStream);
2802 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2804 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset points to not matching object");
2805 return 0;
2810 PDFDocument::SkipWhitespace(rStream);
2811 PDFNumberElement aNumber;
2812 bool bRet = aNumber.Read(rStream);
2813 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2815 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2816 return 0;
2821 PDFDocument::SkipWhitespace(rStream);
2822 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2823 if (aKeyword != "obj")
2825 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2826 return 0;
2830 PDFDocument::SkipWhitespace(rStream);
2831 PDFNumberElement aNumber;
2832 if (!aNumber.Read(rStream))
2834 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: failed to read referenced number");
2835 return 0;
2838 return aNumber.GetValue();
2841 PDFObjectElement* PDFReferenceElement::LookupObject()
2843 return m_rDoc.LookupObject(m_fObjectValue);
2846 PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber)
2848 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2850 if (itIDObjects != m_aIDObjects.end())
2851 return itIDObjects->second;
2853 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2854 return nullptr;
2857 SvMemoryStream& PDFDocument::GetEditBuffer()
2859 return m_aEditBuffer;
2862 int PDFReferenceElement::GetObjectValue() const
2864 return m_fObjectValue;
2867 int PDFReferenceElement::GetGenerationValue() const
2869 return m_fGenerationValue;
2872 bool PDFDictionaryElement::Read(SvStream& rStream)
2874 char ch;
2875 rStream.ReadChar(ch);
2876 if (ch != '<')
2878 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2879 return false;
2882 if (rStream.IsEof())
2884 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2885 return false;
2888 rStream.ReadChar(ch);
2889 if (ch != '<')
2891 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2892 return false;
2895 m_nLocation = rStream.Tell();
2897 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2899 return true;
2902 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2904 sal_uInt64 PDFEndDictionaryElement::GetLocation() const
2906 return m_nLocation;
2909 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2911 m_nLocation = rStream.Tell();
2912 char ch;
2913 rStream.ReadChar(ch);
2914 if (ch != '>')
2916 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2917 return false;
2920 if (rStream.IsEof())
2922 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2923 return false;
2926 rStream.ReadChar(ch);
2927 if (ch != '>')
2929 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2930 return false;
2933 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2935 return true;
2938 PDFNameElement::PDFNameElement()
2939 : m_nLocation(0),
2940 m_nLength(0)
2944 bool PDFNameElement::Read(SvStream& rStream)
2946 char ch;
2947 rStream.ReadChar(ch);
2948 if (ch != '/')
2950 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2951 return false;
2953 m_nLocation = rStream.Tell();
2955 if (rStream.IsEof())
2957 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2958 return false;
2961 // Read till the first white-space.
2962 OStringBuffer aBuf;
2963 rStream.ReadChar(ch);
2964 while (!rStream.IsEof())
2966 if (isspace(ch) || ch == '/' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2968 rStream.SeekRel(-1);
2969 m_aValue = aBuf.makeStringAndClear();
2970 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2971 return true;
2973 aBuf.append(ch);
2974 rStream.ReadChar(ch);
2977 return false;
2980 const OString& PDFNameElement::GetValue() const
2982 return m_aValue;
2985 sal_uInt64 PDFNameElement::GetLocation() const
2987 return m_nLocation;
2990 sal_uInt64 PDFNameElement::GetLength() const
2992 return m_nLength;
2995 PDFStreamElement::PDFStreamElement(size_t nLength)
2996 : m_nLength(nLength),
2997 m_nOffset(0)
3001 bool PDFStreamElement::Read(SvStream& rStream)
3003 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
3004 m_nOffset = rStream.Tell();
3005 std::vector<unsigned char> aBytes(m_nLength);
3006 rStream.ReadBytes(aBytes.data(), aBytes.size());
3007 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
3009 return rStream.good();
3012 SvMemoryStream& PDFStreamElement::GetMemory()
3014 return m_aMemory;
3017 sal_uInt64 PDFStreamElement::GetOffset() const
3019 return m_nOffset;
3022 bool PDFEndStreamElement::Read(SvStream& /*rStream*/)
3024 return true;
3027 bool PDFEndObjectElement::Read(SvStream& /*rStream*/)
3029 return true;
3032 PDFArrayElement::PDFArrayElement(PDFObjectElement* pObject)
3033 : m_pObject(pObject)
3037 bool PDFArrayElement::Read(SvStream& rStream)
3039 char ch;
3040 rStream.ReadChar(ch);
3041 if (ch != '[')
3043 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
3044 return false;
3046 m_nOffset = rStream.Tell();
3048 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
3050 return true;
3053 void PDFArrayElement::PushBack(PDFElement* pElement)
3055 if (m_pObject)
3056 SAL_INFO("vcl.filter", "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
3057 m_aElements.push_back(pElement);
3060 const std::vector<PDFElement*>& PDFArrayElement::GetElements()
3062 return m_aElements;
3065 PDFEndArrayElement::PDFEndArrayElement() = default;
3067 bool PDFEndArrayElement::Read(SvStream& rStream)
3069 m_nOffset = rStream.Tell();
3070 char ch;
3071 rStream.ReadChar(ch);
3072 if (ch != ']')
3074 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
3075 return false;
3078 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3080 return true;
3083 sal_uInt64 PDFEndArrayElement::GetOffset() const
3085 return m_nOffset;
3088 } // namespace filter
3089 } // namespace vcl
3091 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */