xmlsecurity: handle MDP permission during PDF verify
[LibreOffice.git] / include / vcl / filter / pdfdocument.hxx
blob5bb2a87482eb558efe555133d60af15eee2f2c8b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 */
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
14 #include <memory>
15 #include <map>
16 #include <vector>
18 #include <tools/stream.hxx>
19 #include <vcl/dllapi.h>
20 #include <rtl/strbuf.hxx>
22 #include <vcl/filter/pdfobjectcontainer.hxx>
24 namespace com
26 namespace sun
28 namespace star
30 namespace security
32 class XCertificate;
38 namespace com
40 namespace sun
42 namespace star
44 namespace uno
46 template <class interface_type> class Reference;
52 namespace tools
54 class Rectangle;
57 namespace vcl
59 namespace filter
61 class PDFTrailerElement;
62 class PDFReferenceElement;
63 class PDFDocument;
64 class PDFDictionaryElement;
65 class PDFArrayElement;
66 class PDFStreamElement;
67 class PDFNumberElement;
69 /// A byte range in a PDF file.
70 class VCL_DLLPUBLIC PDFElement
72 bool m_bVisiting = false;
73 bool m_bParsing = false;
75 public:
76 PDFElement() = default;
77 virtual bool Read(SvStream& rStream) = 0;
78 virtual ~PDFElement() = default;
79 void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
80 bool alreadyVisiting() const { return m_bVisiting; }
81 void setParsing(bool bParsing) { m_bParsing = bParsing; }
82 bool alreadyParsing() const { return m_bParsing; }
84 virtual void writeString(OStringBuffer& rBuffer) = 0;
87 /// Indirect object: something with a unique ID.
88 class VCL_DLLPUBLIC PDFObjectElement final : public PDFElement
90 /// The document owning this element.
91 PDFDocument& m_rDoc;
92 double m_fObjectValue;
93 double m_fGenerationValue;
94 /// If set, the object contains this number element (outside any dictionary/array).
95 PDFNumberElement* m_pNumberElement;
96 /// Position after the '<<' token.
97 sal_uInt64 m_nDictionaryOffset;
98 /// Length of the dictionary buffer till (before) the '>>' token.
99 sal_uInt64 m_nDictionaryLength;
100 PDFDictionaryElement* m_pDictionaryElement;
101 /// Position after the '[' token, if m_pArrayElement is set.
102 sal_uInt64 m_nArrayOffset;
103 /// Length of the array buffer till (before) the ']' token.
104 sal_uInt64 m_nArrayLength;
105 /// The contained direct array, if any.
106 PDFArrayElement* m_pArrayElement;
107 /// The stream of this object, used when this is an object stream.
108 PDFStreamElement* m_pStreamElement;
109 /// Objects of an object stream.
110 std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
111 /// Elements of an object in an object stream.
112 std::vector<std::unique_ptr<PDFElement>> m_aElements;
113 /// Uncompressed buffer of an object in an object stream.
114 std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
115 /// List of all reference elements inside this object's dictionary and
116 /// nested dictionaries.
117 std::vector<PDFReferenceElement*> m_aDictionaryReferences;
119 bool m_bParsed;
121 void parseIfNecessary();
123 public:
124 PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
125 bool Read(SvStream& rStream) override;
126 PDFElement* Lookup(const OString& rDictionaryKey);
127 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
128 double GetObjectValue() const;
129 void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
130 sal_uInt64 GetDictionaryOffset();
131 void SetDictionaryLength(sal_uInt64 nDictionaryLength);
132 sal_uInt64 GetDictionaryLength();
133 PDFDictionaryElement* GetDictionary();
134 void SetDictionary(PDFDictionaryElement* pDictionaryElement);
135 void SetNumberElement(PDFNumberElement* pNumberElement);
136 PDFNumberElement* GetNumberElement() const;
137 /// Get access to the parsed key-value items from the object dictionary.
138 const std::map<OString, PDFElement*>& GetDictionaryItems();
139 const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
140 void AddDictionaryReference(PDFReferenceElement* pReference);
141 void SetArray(PDFArrayElement* pArrayElement);
142 void SetStream(PDFStreamElement* pStreamElement);
143 /// Access to the stream of the object, if it has any.
144 PDFStreamElement* GetStream() const;
145 void SetArrayOffset(sal_uInt64 nArrayOffset);
146 sal_uInt64 GetArrayOffset() const;
147 void SetArrayLength(sal_uInt64 nArrayLength);
148 sal_uInt64 GetArrayLength() const;
149 PDFArrayElement* GetArray();
150 /// Parse objects stored in this object stream.
151 void ParseStoredObjects();
152 std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
153 SvMemoryStream* GetStreamBuffer() const;
154 void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
155 PDFDocument& GetDocument();
157 void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
160 /// Array object: a list.
161 class VCL_DLLPUBLIC PDFArrayElement : public PDFElement
163 std::vector<PDFElement*> m_aElements;
164 /// The object that contains this array.
165 PDFObjectElement* const m_pObject;
167 public:
168 PDFArrayElement(PDFObjectElement* pObject);
169 bool Read(SvStream& rStream) override;
170 void PushBack(PDFElement* pElement);
171 const std::vector<PDFElement*>& GetElements() const;
172 PDFElement* GetElement(size_t nIndex) const { return m_aElements[nIndex]; }
174 void writeString(OStringBuffer& rBuffer) override
176 rBuffer.append("[ ");
177 for (auto& rElement : m_aElements)
179 rElement->writeString(rBuffer);
180 rBuffer.append(" ");
182 rBuffer.append("]");
186 /// Reference object: something with a unique ID.
187 class VCL_DLLPUBLIC PDFReferenceElement : public PDFElement
189 PDFDocument& m_rDoc;
190 int m_fObjectValue;
191 int m_fGenerationValue;
192 /// Location after the 'R' token.
193 sal_uInt64 m_nOffset = 0;
194 /// The element providing the object number.
195 PDFNumberElement& m_rObject;
197 public:
198 PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
199 PDFNumberElement const& rGeneration);
200 bool Read(SvStream& rStream) override;
201 /// Assuming the reference points to a number object, return its value.
202 double LookupNumber(SvStream& rStream) const;
203 /// Lookup referenced object, without assuming anything about its contents.
204 PDFObjectElement* LookupObject();
205 int GetObjectValue() const;
206 int GetGenerationValue() const;
207 sal_uInt64 GetOffset() const;
208 PDFNumberElement& GetObjectElement() const;
210 void writeString(OStringBuffer& rBuffer) override
212 rBuffer.append(sal_Int32(GetObjectValue()));
213 rBuffer.append(' ');
214 rBuffer.append(sal_Int32(GetGenerationValue()));
215 rBuffer.append(" R");
219 /// Stream object: a byte array with a known length.
220 class VCL_DLLPUBLIC PDFStreamElement : public PDFElement
222 size_t const m_nLength;
223 sal_uInt64 m_nOffset;
224 /// The byte array itself.
225 SvMemoryStream m_aMemory;
227 public:
228 explicit PDFStreamElement(size_t nLength);
229 bool Read(SvStream& rStream) override;
230 sal_uInt64 GetOffset() const;
231 SvMemoryStream& GetMemory();
233 void writeString(OStringBuffer& rBuffer) override
235 rBuffer.append("stream\n");
236 rBuffer.append(static_cast<const char*>(m_aMemory.GetData()), m_aMemory.GetSize());
237 rBuffer.append("\nendstream\n");
241 /// Name object: a key string.
242 class VCL_DLLPUBLIC PDFNameElement final : public PDFElement
244 OString m_aValue;
245 /// Offset after the '/' token.
246 sal_uInt64 m_nLocation = 0;
248 public:
249 PDFNameElement();
250 bool Read(SvStream& rStream) override;
251 void SetValue(const OString& rValue) { m_aValue = rValue; }
252 const OString& GetValue() const;
253 sal_uInt64 GetLocation() const;
254 sal_uInt64 GetLength() { return m_aValue.getLength(); }
256 void writeString(OStringBuffer& rBuffer) override
258 rBuffer.append("/");
259 rBuffer.append(m_aValue);
263 /// Dictionary object: a set key-value pairs.
264 class VCL_DLLPUBLIC PDFDictionaryElement : public PDFElement
266 /// Key-value pairs when the dictionary is a nested value.
267 std::map<OString, PDFElement*> m_aItems;
268 /// Offset after the '<<' token.
269 sal_uInt64 m_nLocation = 0;
270 /// Position after the '/' token.
271 std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
272 /// Length of the dictionary key and value, till (before) the next token.
273 std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
275 public:
276 PDFDictionaryElement();
277 bool Read(SvStream& rStream) override;
279 static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
280 const OString& rKey);
281 void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
282 sal_uInt64 GetKeyOffset(const OString& rKey) const;
283 void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
284 sal_uInt64 GetKeyValueLength(const OString& rKey) const;
285 const std::map<OString, PDFElement*>& GetItems() const;
286 /// Looks up an object which is only referenced in this dictionary.
287 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
288 /// Looks up an element which is contained in this dictionary.
289 PDFElement* LookupElement(const OString& rDictionaryKey);
290 sal_uInt64 GetLocation() const { return m_nLocation; }
291 void insert(OString const& rKey, PDFElement* pPDFElement)
293 m_aItems.emplace(rKey, pPDFElement);
296 void writeString(OStringBuffer& rBuffer) override
298 rBuffer.append("<< ");
299 for (auto& rPair : m_aItems)
301 rBuffer.append("/");
302 rBuffer.append(rPair.first);
303 rBuffer.append(" ");
304 rPair.second->writeString(rBuffer);
305 rBuffer.append(" ");
307 rBuffer.append(">>");
311 enum class TokenizeMode
313 /// Full file.
314 END_OF_STREAM,
315 /// Till the first %%EOF token.
316 EOF_TOKEN,
317 /// Till the end of the current object.
318 END_OF_OBJECT,
319 /// Same as END_OF_OBJECT, but for object streams (no endobj keyword).
320 STORED_OBJECT
323 /// The type column of an entry in a cross-reference stream.
324 enum class XRefEntryType
326 /// xref "f" or xref stream "0".
327 FREE,
328 /// xref "n" or xref stream "1".
329 NOT_COMPRESSED,
330 /// xref stream "2".
331 COMPRESSED
334 /// An entry in a cross-reference stream.
335 class XRefEntry
337 XRefEntryType m_eType = XRefEntryType::NOT_COMPRESSED;
339 * Non-compressed: The byte offset of the object, starting from the
340 * beginning of the file.
341 * Compressed: The object number of the object stream in which this object is
342 * stored.
344 sal_uInt64 m_nOffset = 0;
345 /// Are changed as part of an incremental update?.
346 bool m_bDirty = false;
348 public:
349 XRefEntry();
351 void SetType(XRefEntryType eType) { m_eType = eType; }
353 XRefEntryType GetType() const { return m_eType; }
355 void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
357 sal_uInt64 GetOffset() const { return m_nOffset; }
359 void SetDirty(bool bDirty) { m_bDirty = bDirty; }
361 bool GetDirty() const { return m_bDirty; }
364 /// Hex string: in <AABB> form.
365 class VCL_DLLPUBLIC PDFHexStringElement final : public PDFElement
367 OString m_aValue;
369 public:
370 bool Read(SvStream& rStream) override;
371 const OString& GetValue() const;
373 void writeString(OStringBuffer& rBuffer) override
375 rBuffer.append("<");
376 rBuffer.append(m_aValue);
377 rBuffer.append(">");
381 /// Literal string: in (asdf) form.
382 class VCL_DLLPUBLIC PDFLiteralStringElement final : public PDFElement
384 OString m_aValue;
386 public:
387 bool Read(SvStream& rStream) override;
388 const OString& GetValue() const;
390 void writeString(OStringBuffer& rBuffer) override
392 rBuffer.append("(");
393 rBuffer.append(m_aValue);
394 rBuffer.append(")");
398 /// Numbering object: an integer or a real.
399 class VCL_DLLPUBLIC PDFNumberElement : public PDFElement
401 /// Input file start location.
402 sal_uInt64 m_nOffset = 0;
403 /// Input file token length.
404 sal_uInt64 m_nLength = 0;
405 double m_fValue = 0;
407 public:
408 PDFNumberElement();
409 bool Read(SvStream& rStream) override;
410 double GetValue() const;
411 void SetValue(double fValue) { m_fValue = fValue; }
413 sal_uInt64 GetLocation() const;
414 sal_uInt64 GetLength() const;
416 void writeString(OStringBuffer& rBuffer) override { rBuffer.append(m_fValue); }
419 /// A one-liner comment.
420 class VCL_DLLPUBLIC PDFCommentElement : public PDFElement
422 PDFDocument& m_rDoc;
423 OString m_aComment;
425 public:
426 explicit PDFCommentElement(PDFDocument& rDoc);
427 bool Read(SvStream& rStream) override;
428 void writeString(OStringBuffer& /*rBuffer*/) override {}
431 /// End of a dictionary: '>>'.
432 class VCL_DLLPUBLIC PDFEndDictionaryElement : public PDFElement
434 /// Offset before the '>>' token.
435 sal_uInt64 m_nLocation = 0;
437 public:
438 PDFEndDictionaryElement();
439 bool Read(SvStream& rStream) override;
440 sal_uInt64 GetLocation() const;
442 void writeString(OStringBuffer& /*rBuffer*/) override {}
445 /// End of a stream: 'endstream' keyword.
446 class VCL_DLLPUBLIC PDFEndStreamElement : public PDFElement
448 public:
449 bool Read(SvStream& rStream) override;
451 void writeString(OStringBuffer& /*rBuffer*/) override {}
454 /// End of an object: 'endobj' keyword.
455 class VCL_DLLPUBLIC PDFEndObjectElement : public PDFElement
457 public:
458 bool Read(SvStream& rStream) override;
460 void writeString(OStringBuffer& /*rBuffer*/) override {}
463 /// End of an array: ']'.
464 class VCL_DLLPUBLIC PDFEndArrayElement : public PDFElement
466 /// Location before the ']' token.
467 sal_uInt64 m_nOffset = 0;
469 public:
470 PDFEndArrayElement();
471 bool Read(SvStream& rStream) override;
472 sal_uInt64 GetOffset() const;
474 void writeString(OStringBuffer& /*rBuffer*/) override {}
477 /// Boolean object: a 'true' or a 'false'.
478 class VCL_DLLPUBLIC PDFBooleanElement : public PDFElement
480 bool m_aValue;
482 public:
483 explicit PDFBooleanElement(bool bValue)
484 : m_aValue(bValue)
488 bool Read(SvStream& rStream) override;
490 void writeString(OStringBuffer& rBuffer) override
492 rBuffer.append(m_aValue ? "true" : "false");
496 /// Null object: the 'null' singleton.
497 class VCL_DLLPUBLIC PDFNullElement : public PDFElement
499 public:
500 bool Read(SvStream& rStream) override;
502 void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); }
506 * In-memory representation of an on-disk PDF document.
508 * The PDF element list is not meant to be saved back to disk, but some
509 * elements remember their source offset / length, and based on that it's
510 * possible to modify the input file.
512 class VCL_DLLPUBLIC PDFDocument : public PDFObjectContainer
514 /// This vector owns all elements.
515 std::vector<std::unique_ptr<PDFElement>> m_aElements;
516 /// Object ID <-> object offset map.
517 std::map<size_t, XRefEntry> m_aXRef;
518 /// Object offset <-> Object pointer map.
519 std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
520 /// Object ID <-> Object pointer map.
521 std::map<size_t, PDFObjectElement*> m_aIDObjects;
522 /// List of xref offsets we know.
523 std::vector<size_t> m_aStartXRefs;
524 /// Offsets of trailers, from latest to oldest.
525 std::vector<size_t> m_aTrailerOffsets;
526 /// Trailer offset <-> Trailer pointer map.
527 std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
528 /// List of EOF offsets we know.
529 std::vector<size_t> m_aEOFs;
530 PDFTrailerElement* m_pTrailer = nullptr;
531 /// When m_pTrailer is nullptr, this can still have a dictionary.
532 PDFObjectElement* m_pXRefStream = nullptr;
533 /// All editing takes place in this buffer, if it happens.
534 SvMemoryStream m_aEditBuffer;
536 /// Signature line in PDF format, to be consumed by the next Sign() invocation.
537 std::vector<sal_Int8> m_aSignatureLine;
539 /// 0-based page number where m_aSignatureLine should be placed.
540 size_t m_nSignaturePage = 0;
542 /// Suggest a minimal, yet free signature ID to use for the next signature.
543 sal_uInt32 GetNextSignature();
544 /// Write the signature object as part of signing.
545 sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
546 sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
547 /// Write the appearance object as part of signing.
548 sal_Int32 WriteAppearanceObject(tools::Rectangle& rSignatureRectangle);
549 /// Write the annot object as part of signing.
550 sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
551 sal_Int32 nAppearanceId,
552 const tools::Rectangle& rSignatureRectangle);
553 /// Write the updated Page object as part of signing.
554 bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
555 /// Write the updated Catalog object as part of signing.
556 bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
557 /// Write the updated cross-references as part of signing.
558 void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
560 public:
561 PDFDocument();
562 virtual ~PDFDocument();
563 PDFDocument& operator=(const PDFDocument&) = delete;
564 PDFDocument(const PDFDocument&) = delete;
565 /// @name Low-level functions, to be used by PDFElement subclasses.
566 //@{
567 /// Decode a hex dump.
568 static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
569 static OString ReadKeyword(SvStream& rStream);
570 static size_t FindStartXRef(SvStream& rStream);
571 void ReadXRef(SvStream& rStream);
572 void ReadXRefStream(SvStream& rStream);
573 static void SkipWhitespace(SvStream& rStream);
574 /// Instead of all whitespace, just skip CR and NL characters.
575 static void SkipLineBreaks(SvStream& rStream);
576 size_t GetObjectOffset(size_t nIndex) const;
577 const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
578 std::vector<PDFObjectElement*> GetPages();
579 PDFObjectElement* GetCatalog();
580 /// Remember the end location of an EOF token.
581 void PushBackEOF(size_t nOffset);
582 /// Look up object based on object number, possibly by parsing object streams.
583 PDFObjectElement* LookupObject(size_t nObjectNumber);
584 /// Access to the input document, even after the input stream is gone.
585 SvMemoryStream& GetEditBuffer();
586 /// Tokenize elements from current offset.
587 bool Tokenize(SvStream& rStream, TokenizeMode eMode,
588 std::vector<std::unique_ptr<PDFElement>>& rElements,
589 PDFObjectElement* pObjectElement);
590 /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
591 void SetIDObject(size_t nID, PDFObjectElement* pObject);
592 //@}
594 /// @name High-level functions, to be used by others.
595 //@{
596 /// Read elements from the start of the stream till its end.
597 bool Read(SvStream& rStream);
598 void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine);
599 void SetSignaturePage(size_t nPage);
600 /// Sign the read document with xCertificate in the edit buffer.
601 bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
602 const OUString& rDescription, bool bAdES);
603 /// Serializes the contents of the edit buffer.
604 bool Write(SvStream& rStream);
605 /// Get a list of signatures embedded into this document.
606 std::vector<PDFObjectElement*> GetSignatureWidgets();
608 * Get the value of the "modification detection and prevention" permission:
609 * Valid values are 1, 2 and 3: only 3 allows annotations after signing.
611 int GetMDPPerm();
612 /// Remove the nth signature from read document in the edit buffer.
613 bool RemoveSignature(size_t nPosition);
614 /// Get byte offsets of the end of incremental updates.
615 const std::vector<size_t>& GetEOFs() const;
616 //@}
618 /// See vcl::PDFObjectContainer::createObject().
619 sal_Int32 createObject() override;
620 /// See vcl::PDFObjectContainer::updateObject().
621 bool updateObject(sal_Int32 n) override;
622 /// See vcl::PDFObjectContainer::writeBuffer().
623 bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) override;
626 /// The trailer singleton is at the end of the doc.
627 class VCL_DLLPUBLIC PDFTrailerElement : public PDFElement
629 PDFDocument& m_rDoc;
630 PDFDictionaryElement* m_pDictionaryElement;
631 /// Location of the end of the trailer token.
632 sal_uInt64 m_nOffset = 0;
634 public:
635 explicit PDFTrailerElement(PDFDocument& rDoc);
636 bool Read(SvStream& rStream) override;
637 PDFElement* Lookup(const OString& rDictionaryKey);
638 sal_uInt64 GetLocation() const;
640 void SetDictionary(PDFDictionaryElement* pDictionaryElement)
642 m_pDictionaryElement = pDictionaryElement;
645 PDFDictionaryElement* GetDictionary() { return m_pDictionaryElement; }
647 void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
650 class VCL_DLLPUBLIC PDFObjectParser final
652 const std::vector<std::unique_ptr<PDFElement>>& mrElements;
654 public:
655 PDFObjectParser(std::vector<std::unique_ptr<PDFElement>> const& rElements)
656 : mrElements(rElements)
660 size_t parse(PDFElement* pParsingElement, size_t nStartIndex = 0, int nCurrentDepth = 0);
663 } // namespace pdfio
664 } // namespace xmlsecurity
666 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
668 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */