Bump version to 21.06.18.1
[LibreOffice.git] / include / vcl / filter / pdfdocument.hxx
blob7f7cc8dfb64198f6bea2fd26aeb60cb5df807090
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 */
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
14 #include <memory>
15 #include <map>
16 #include <vector>
18 #include <tools/stream.hxx>
19 #include <vcl/dllapi.h>
20 #include <rtl/strbuf.hxx>
22 #include <vcl/filter/pdfobjectcontainer.hxx>
24 namespace com::sun::star::security
26 class XCertificate;
29 namespace com::sun::star::uno
31 template <class interface_type> class Reference;
34 namespace tools
36 class Rectangle;
39 namespace vcl::filter
41 class PDFTrailerElement;
42 class PDFReferenceElement;
43 class PDFDocument;
44 class PDFDictionaryElement;
45 class PDFArrayElement;
46 class PDFStreamElement;
47 class PDFNumberElement;
49 /// A byte range in a PDF file.
50 class VCL_DLLPUBLIC PDFElement
52 bool m_bVisiting = false;
53 bool m_bParsing = false;
55 public:
56 PDFElement() = default;
57 virtual bool Read(SvStream& rStream) = 0;
58 virtual ~PDFElement() = default;
59 void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
60 bool alreadyVisiting() const { return m_bVisiting; }
61 void setParsing(bool bParsing) { m_bParsing = bParsing; }
62 bool alreadyParsing() const { return m_bParsing; }
64 virtual void writeString(OStringBuffer& rBuffer) = 0;
67 /// Indirect object: something with a unique ID.
68 class VCL_DLLPUBLIC PDFObjectElement final : public PDFElement
70 /// The document owning this element.
71 PDFDocument& m_rDoc;
72 double m_fObjectValue;
73 double m_fGenerationValue;
74 /// If set, the object contains this number element (outside any dictionary/array).
75 PDFNumberElement* m_pNumberElement;
76 /// Position after the '<<' token.
77 sal_uInt64 m_nDictionaryOffset;
78 /// Length of the dictionary buffer till (before) the '>>' token.
79 sal_uInt64 m_nDictionaryLength;
80 PDFDictionaryElement* m_pDictionaryElement;
81 /// Position after the '[' token, if m_pArrayElement is set.
82 sal_uInt64 m_nArrayOffset;
83 /// Length of the array buffer till (before) the ']' token.
84 sal_uInt64 m_nArrayLength;
85 /// The contained direct array, if any.
86 PDFArrayElement* m_pArrayElement;
87 /// The stream of this object, used when this is an object stream.
88 PDFStreamElement* m_pStreamElement;
89 /// Objects of an object stream.
90 std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
91 /// Elements of an object in an object stream.
92 std::vector<std::unique_ptr<PDFElement>> m_aElements;
93 /// Uncompressed buffer of an object in an object stream.
94 std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
95 /// List of all reference elements inside this object's dictionary and
96 /// nested dictionaries.
97 std::vector<PDFReferenceElement*> m_aDictionaryReferences;
99 bool m_bParsed;
101 void parseIfNecessary();
103 public:
104 PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
105 bool Read(SvStream& rStream) override;
106 PDFElement* Lookup(const OString& rDictionaryKey);
107 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
108 double GetObjectValue() const;
109 void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
110 sal_uInt64 GetDictionaryOffset();
111 void SetDictionaryLength(sal_uInt64 nDictionaryLength);
112 sal_uInt64 GetDictionaryLength();
113 PDFDictionaryElement* GetDictionary();
114 void SetDictionary(PDFDictionaryElement* pDictionaryElement);
115 void SetNumberElement(PDFNumberElement* pNumberElement);
116 PDFNumberElement* GetNumberElement() const;
117 /// Get access to the parsed key-value items from the object dictionary.
118 const std::map<OString, PDFElement*>& GetDictionaryItems();
119 const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
120 void AddDictionaryReference(PDFReferenceElement* pReference);
121 void SetArray(PDFArrayElement* pArrayElement);
122 void SetStream(PDFStreamElement* pStreamElement);
123 /// Access to the stream of the object, if it has any.
124 PDFStreamElement* GetStream() const;
125 void SetArrayOffset(sal_uInt64 nArrayOffset);
126 sal_uInt64 GetArrayOffset() const;
127 void SetArrayLength(sal_uInt64 nArrayLength);
128 sal_uInt64 GetArrayLength() const;
129 PDFArrayElement* GetArray();
130 /// Parse objects stored in this object stream.
131 void ParseStoredObjects();
132 std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
133 SvMemoryStream* GetStreamBuffer() const;
134 void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
135 PDFDocument& GetDocument();
137 void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
140 /// Array object: a list.
141 class VCL_DLLPUBLIC PDFArrayElement : public PDFElement
143 std::vector<PDFElement*> m_aElements;
144 /// The object that contains this array.
145 PDFObjectElement* m_pObject;
147 public:
148 PDFArrayElement(PDFObjectElement* pObject);
149 bool Read(SvStream& rStream) override;
150 void PushBack(PDFElement* pElement);
151 const std::vector<PDFElement*>& GetElements() const;
152 PDFElement* GetElement(size_t nIndex) const { return m_aElements[nIndex]; }
154 void writeString(OStringBuffer& rBuffer) override
156 rBuffer.append("[ ");
157 for (auto& rElement : m_aElements)
159 rElement->writeString(rBuffer);
160 rBuffer.append(" ");
162 rBuffer.append("]");
166 /// Reference object: something with a unique ID.
167 class VCL_DLLPUBLIC PDFReferenceElement : public PDFElement
169 PDFDocument& m_rDoc;
170 int m_fObjectValue;
171 int m_fGenerationValue;
172 /// Location after the 'R' token.
173 sal_uInt64 m_nOffset = 0;
174 /// The element providing the object number.
175 PDFNumberElement& m_rObject;
177 public:
178 PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
179 PDFNumberElement const& rGeneration);
180 bool Read(SvStream& rStream) override;
181 /// Assuming the reference points to a number object, return its value.
182 double LookupNumber(SvStream& rStream) const;
183 /// Lookup referenced object, without assuming anything about its contents.
184 PDFObjectElement* LookupObject();
185 int GetObjectValue() const;
186 int GetGenerationValue() const;
187 sal_uInt64 GetOffset() const;
188 PDFNumberElement& GetObjectElement() const;
190 void writeString(OStringBuffer& rBuffer) override
192 rBuffer.append(sal_Int32(GetObjectValue()));
193 rBuffer.append(' ');
194 rBuffer.append(sal_Int32(GetGenerationValue()));
195 rBuffer.append(" R");
199 /// Stream object: a byte array with a known length.
200 class VCL_DLLPUBLIC PDFStreamElement : public PDFElement
202 size_t m_nLength;
203 sal_uInt64 m_nOffset;
204 /// The byte array itself.
205 SvMemoryStream m_aMemory;
207 public:
208 explicit PDFStreamElement(size_t nLength);
209 bool Read(SvStream& rStream) override;
210 sal_uInt64 GetOffset() const;
211 SvMemoryStream& GetMemory();
213 void writeString(OStringBuffer& rBuffer) override
215 rBuffer.append("stream\n");
216 rBuffer.append(static_cast<const char*>(m_aMemory.GetData()), m_aMemory.GetSize());
217 rBuffer.append("\nendstream\n");
221 /// Name object: a key string.
222 class VCL_DLLPUBLIC PDFNameElement final : public PDFElement
224 OString m_aValue;
225 /// Offset after the '/' token.
226 sal_uInt64 m_nLocation = 0;
228 public:
229 PDFNameElement();
230 bool Read(SvStream& rStream) override;
231 void SetValue(const OString& rValue) { m_aValue = rValue; }
232 const OString& GetValue() const;
233 sal_uInt64 GetLocation() const;
234 sal_uInt64 GetLength() { return m_aValue.getLength(); }
236 void writeString(OStringBuffer& rBuffer) override
238 rBuffer.append("/");
239 rBuffer.append(m_aValue);
243 /// Dictionary object: a set key-value pairs.
244 class VCL_DLLPUBLIC PDFDictionaryElement : public PDFElement
246 /// Key-value pairs when the dictionary is a nested value.
247 std::map<OString, PDFElement*> m_aItems;
248 /// Offset after the '<<' token.
249 sal_uInt64 m_nLocation = 0;
250 /// Position after the '/' token.
251 std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
252 /// Length of the dictionary key and value, till (before) the next token.
253 std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
255 public:
256 PDFDictionaryElement();
257 bool Read(SvStream& rStream) override;
259 static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
260 const OString& rKey);
261 void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
262 sal_uInt64 GetKeyOffset(const OString& rKey) const;
263 void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
264 sal_uInt64 GetKeyValueLength(const OString& rKey) const;
265 const std::map<OString, PDFElement*>& GetItems() const;
266 /// Looks up an object which is only referenced in this dictionary.
267 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
268 /// Looks up an element which is contained in this dictionary.
269 PDFElement* LookupElement(const OString& rDictionaryKey);
270 sal_uInt64 GetLocation() const { return m_nLocation; }
271 void insert(OString const& rKey, PDFElement* pPDFElement)
273 m_aItems.emplace(rKey, pPDFElement);
276 void writeString(OStringBuffer& rBuffer) override
278 rBuffer.append("<< ");
279 for (auto& rPair : m_aItems)
281 rBuffer.append("/");
282 rBuffer.append(rPair.first);
283 rBuffer.append(" ");
284 rPair.second->writeString(rBuffer);
285 rBuffer.append(" ");
287 rBuffer.append(">>");
291 enum class TokenizeMode
293 /// Full file.
294 END_OF_STREAM,
295 /// Till the first %%EOF token.
296 EOF_TOKEN,
297 /// Till the end of the current object.
298 END_OF_OBJECT,
299 /// Same as END_OF_OBJECT, but for object streams (no endobj keyword).
300 STORED_OBJECT
303 /// The type column of an entry in a cross-reference stream.
304 enum class XRefEntryType
306 /// xref "f" or xref stream "0".
307 FREE,
308 /// xref "n" or xref stream "1".
309 NOT_COMPRESSED,
310 /// xref stream "2".
311 COMPRESSED
314 /// An entry in a cross-reference stream.
315 class XRefEntry
317 XRefEntryType m_eType = XRefEntryType::NOT_COMPRESSED;
319 * Non-compressed: The byte offset of the object, starting from the
320 * beginning of the file.
321 * Compressed: The object number of the object stream in which this object is
322 * stored.
324 sal_uInt64 m_nOffset = 0;
325 /// Are changed as part of an incremental update?.
326 bool m_bDirty = false;
328 public:
329 XRefEntry();
331 void SetType(XRefEntryType eType) { m_eType = eType; }
333 XRefEntryType GetType() const { return m_eType; }
335 void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
337 sal_uInt64 GetOffset() const { return m_nOffset; }
339 void SetDirty(bool bDirty) { m_bDirty = bDirty; }
341 bool GetDirty() const { return m_bDirty; }
344 /// Hex string: in <AABB> form.
345 class VCL_DLLPUBLIC PDFHexStringElement final : public PDFElement
347 OString m_aValue;
349 public:
350 bool Read(SvStream& rStream) override;
351 const OString& GetValue() const;
353 void writeString(OStringBuffer& rBuffer) override
355 rBuffer.append("<");
356 rBuffer.append(m_aValue);
357 rBuffer.append(">");
361 /// Literal string: in (asdf) form.
362 class VCL_DLLPUBLIC PDFLiteralStringElement final : public PDFElement
364 OString m_aValue;
366 public:
367 bool Read(SvStream& rStream) override;
368 const OString& GetValue() const;
370 void writeString(OStringBuffer& rBuffer) override
372 rBuffer.append("(");
373 rBuffer.append(m_aValue);
374 rBuffer.append(")");
378 /// Numbering object: an integer or a real.
379 class VCL_DLLPUBLIC PDFNumberElement : public PDFElement
381 /// Input file start location.
382 sal_uInt64 m_nOffset = 0;
383 /// Input file token length.
384 sal_uInt64 m_nLength = 0;
385 double m_fValue = 0;
387 public:
388 PDFNumberElement();
389 bool Read(SvStream& rStream) override;
390 double GetValue() const;
391 void SetValue(double fValue) { m_fValue = fValue; }
393 sal_uInt64 GetLocation() const;
394 sal_uInt64 GetLength() const;
396 void writeString(OStringBuffer& rBuffer) override { rBuffer.append(m_fValue); }
399 /// A one-liner comment.
400 class VCL_DLLPUBLIC PDFCommentElement : public PDFElement
402 PDFDocument& m_rDoc;
403 OString m_aComment;
405 public:
406 explicit PDFCommentElement(PDFDocument& rDoc);
407 bool Read(SvStream& rStream) override;
408 void writeString(OStringBuffer& /*rBuffer*/) override {}
411 /// End of a dictionary: '>>'.
412 class VCL_DLLPUBLIC PDFEndDictionaryElement : public PDFElement
414 /// Offset before the '>>' token.
415 sal_uInt64 m_nLocation = 0;
417 public:
418 PDFEndDictionaryElement();
419 bool Read(SvStream& rStream) override;
420 sal_uInt64 GetLocation() const;
422 void writeString(OStringBuffer& /*rBuffer*/) override {}
425 /// End of a stream: 'endstream' keyword.
426 class VCL_DLLPUBLIC PDFEndStreamElement : public PDFElement
428 public:
429 bool Read(SvStream& rStream) override;
431 void writeString(OStringBuffer& /*rBuffer*/) override {}
434 /// End of an object: 'endobj' keyword.
435 class VCL_DLLPUBLIC PDFEndObjectElement : public PDFElement
437 public:
438 bool Read(SvStream& rStream) override;
440 void writeString(OStringBuffer& /*rBuffer*/) override {}
443 /// End of an array: ']'.
444 class VCL_DLLPUBLIC PDFEndArrayElement : public PDFElement
446 /// Location before the ']' token.
447 sal_uInt64 m_nOffset = 0;
449 public:
450 PDFEndArrayElement();
451 bool Read(SvStream& rStream) override;
452 sal_uInt64 GetOffset() const;
454 void writeString(OStringBuffer& /*rBuffer*/) override {}
457 /// Boolean object: a 'true' or a 'false'.
458 class VCL_DLLPUBLIC PDFBooleanElement : public PDFElement
460 bool m_aValue;
462 public:
463 explicit PDFBooleanElement(bool bValue)
464 : m_aValue(bValue)
468 bool Read(SvStream& rStream) override;
470 void writeString(OStringBuffer& rBuffer) override
472 rBuffer.append(m_aValue ? "true" : "false");
476 /// Null object: the 'null' singleton.
477 class VCL_DLLPUBLIC PDFNullElement : public PDFElement
479 public:
480 bool Read(SvStream& rStream) override;
482 void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); }
486 * In-memory representation of an on-disk PDF document.
488 * The PDF element list is not meant to be saved back to disk, but some
489 * elements remember their source offset / length, and based on that it's
490 * possible to modify the input file.
492 class VCL_DLLPUBLIC PDFDocument : public PDFObjectContainer
494 /// This vector owns all elements.
495 std::vector<std::unique_ptr<PDFElement>> m_aElements;
496 /// Object ID <-> object offset map.
497 std::map<size_t, XRefEntry> m_aXRef;
498 /// Object offset <-> Object pointer map.
499 std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
500 /// Object ID <-> Object pointer map.
501 std::map<size_t, PDFObjectElement*> m_aIDObjects;
502 /// List of xref offsets we know.
503 std::vector<size_t> m_aStartXRefs;
504 /// Offsets of trailers, from latest to oldest.
505 std::vector<size_t> m_aTrailerOffsets;
506 /// Trailer offset <-> Trailer pointer map.
507 std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
508 /// List of EOF offsets we know.
509 std::vector<size_t> m_aEOFs;
510 PDFTrailerElement* m_pTrailer = nullptr;
511 /// When m_pTrailer is nullptr, this can still have a dictionary.
512 PDFObjectElement* m_pXRefStream = nullptr;
513 /// All editing takes place in this buffer, if it happens.
514 SvMemoryStream m_aEditBuffer;
516 /// Signature line in PDF format, to be consumed by the next Sign() invocation.
517 std::vector<sal_Int8> m_aSignatureLine;
519 /// 0-based page number where m_aSignatureLine should be placed.
520 size_t m_nSignaturePage = 0;
522 /// Suggest a minimal, yet free signature ID to use for the next signature.
523 sal_uInt32 GetNextSignature();
524 /// Write the signature object as part of signing.
525 sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
526 sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
527 /// Write the appearance object as part of signing.
528 sal_Int32 WriteAppearanceObject(tools::Rectangle& rSignatureRectangle);
529 /// Write the annot object as part of signing.
530 sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
531 sal_Int32 nAppearanceId,
532 const tools::Rectangle& rSignatureRectangle);
533 /// Write the updated Page object as part of signing.
534 bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
535 /// Write the updated Catalog object as part of signing.
536 bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
537 /// Write the updated cross-references as part of signing.
538 void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
540 public:
541 PDFDocument();
542 virtual ~PDFDocument();
543 PDFDocument& operator=(const PDFDocument&) = delete;
544 PDFDocument(const PDFDocument&) = delete;
545 /// @name Low-level functions, to be used by PDFElement subclasses.
546 //@{
547 /// Decode a hex dump.
548 static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
549 static OString ReadKeyword(SvStream& rStream);
550 static size_t FindStartXRef(SvStream& rStream);
551 void ReadXRef(SvStream& rStream);
552 void ReadXRefStream(SvStream& rStream);
553 static void SkipWhitespace(SvStream& rStream);
554 /// Instead of all whitespace, just skip CR and NL characters.
555 static void SkipLineBreaks(SvStream& rStream);
556 size_t GetObjectOffset(size_t nIndex) const;
557 const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
558 std::vector<PDFObjectElement*> GetPages();
559 PDFObjectElement* GetCatalog();
560 /// Remember the end location of an EOF token.
561 void PushBackEOF(size_t nOffset);
562 /// Look up object based on object number, possibly by parsing object streams.
563 PDFObjectElement* LookupObject(size_t nObjectNumber);
564 /// Access to the input document, even after the input stream is gone.
565 SvMemoryStream& GetEditBuffer();
566 /// Tokenize elements from current offset.
567 bool Tokenize(SvStream& rStream, TokenizeMode eMode,
568 std::vector<std::unique_ptr<PDFElement>>& rElements,
569 PDFObjectElement* pObjectElement);
570 /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
571 void SetIDObject(size_t nID, PDFObjectElement* pObject);
572 //@}
574 /// @name High-level functions, to be used by others.
575 //@{
576 /// Read elements from the start of the stream till its end.
577 bool Read(SvStream& rStream);
578 void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine);
579 void SetSignaturePage(size_t nPage);
580 /// Sign the read document with xCertificate in the edit buffer.
581 bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
582 const OUString& rDescription, bool bAdES);
583 /// Serializes the contents of the edit buffer.
584 bool Write(SvStream& rStream);
585 /// Get a list of signatures embedded into this document.
586 std::vector<PDFObjectElement*> GetSignatureWidgets();
587 /// Remove the nth signature from read document in the edit buffer.
588 bool RemoveSignature(size_t nPosition);
589 //@}
591 /// See vcl::PDFObjectContainer::createObject().
592 sal_Int32 createObject() override;
593 /// See vcl::PDFObjectContainer::updateObject().
594 bool updateObject(sal_Int32 n) override;
595 /// See vcl::PDFObjectContainer::writeBuffer().
596 bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) override;
599 /// The trailer singleton is at the end of the doc.
600 class VCL_DLLPUBLIC PDFTrailerElement : public PDFElement
602 PDFDocument& m_rDoc;
603 PDFDictionaryElement* m_pDictionaryElement;
604 /// Location of the end of the trailer token.
605 sal_uInt64 m_nOffset = 0;
607 public:
608 explicit PDFTrailerElement(PDFDocument& rDoc);
609 bool Read(SvStream& rStream) override;
610 PDFElement* Lookup(const OString& rDictionaryKey);
611 sal_uInt64 GetLocation() const;
613 void SetDictionary(PDFDictionaryElement* pDictionaryElement)
615 m_pDictionaryElement = pDictionaryElement;
618 PDFDictionaryElement* GetDictionary() { return m_pDictionaryElement; }
620 void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
623 class VCL_DLLPUBLIC PDFObjectParser final
625 const std::vector<std::unique_ptr<PDFElement>>& mrElements;
627 public:
628 PDFObjectParser(std::vector<std::unique_ptr<PDFElement>> const& rElements)
629 : mrElements(rElements)
633 size_t parse(PDFElement* pParsingElement, size_t nStartIndex = 0, int nCurrentDepth = 0);
636 } // namespace vcl::filter
638 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
640 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */