1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
18 #include <tools/stream.hxx>
19 #include <vcl/dllapi.h>
20 #include <rtl/strbuf.hxx>
22 #include <vcl/filter/pdfobjectcontainer.hxx>
46 template <class interface_type
> class Reference
;
61 class PDFTrailerElement
;
62 class PDFReferenceElement
;
64 class PDFDictionaryElement
;
65 class PDFArrayElement
;
66 class PDFStreamElement
;
67 class PDFNumberElement
;
69 /// A byte range in a PDF file.
70 class VCL_DLLPUBLIC PDFElement
72 bool m_bVisiting
= false;
73 bool m_bParsing
= false;
76 PDFElement() = default;
77 virtual bool Read(SvStream
& rStream
) = 0;
78 virtual ~PDFElement() = default;
79 void setVisiting(bool bVisiting
) { m_bVisiting
= bVisiting
; }
80 bool alreadyVisiting() const { return m_bVisiting
; }
81 void setParsing(bool bParsing
) { m_bParsing
= bParsing
; }
82 bool alreadyParsing() const { return m_bParsing
; }
84 virtual void writeString(OStringBuffer
& rBuffer
) = 0;
87 /// Indirect object: something with a unique ID.
88 class VCL_DLLPUBLIC PDFObjectElement final
: public PDFElement
90 /// The document owning this element.
92 double m_fObjectValue
;
93 double m_fGenerationValue
;
94 /// If set, the object contains this number element (outside any dictionary/array).
95 PDFNumberElement
* m_pNumberElement
;
96 /// Position after the '<<' token.
97 sal_uInt64 m_nDictionaryOffset
;
98 /// Length of the dictionary buffer till (before) the '>>' token.
99 sal_uInt64 m_nDictionaryLength
;
100 PDFDictionaryElement
* m_pDictionaryElement
;
101 /// Position after the '[' token, if m_pArrayElement is set.
102 sal_uInt64 m_nArrayOffset
;
103 /// Length of the array buffer till (before) the ']' token.
104 sal_uInt64 m_nArrayLength
;
105 /// The contained direct array, if any.
106 PDFArrayElement
* m_pArrayElement
;
107 /// The stream of this object, used when this is an object stream.
108 PDFStreamElement
* m_pStreamElement
;
109 /// Objects of an object stream.
110 std::vector
<std::unique_ptr
<PDFObjectElement
>> m_aStoredElements
;
111 /// Elements of an object in an object stream.
112 std::vector
<std::unique_ptr
<PDFElement
>> m_aElements
;
113 /// Uncompressed buffer of an object in an object stream.
114 std::unique_ptr
<SvMemoryStream
> m_pStreamBuffer
;
115 /// List of all reference elements inside this object's dictionary and
116 /// nested dictionaries.
117 std::vector
<PDFReferenceElement
*> m_aDictionaryReferences
;
121 void parseIfNecessary();
124 PDFObjectElement(PDFDocument
& rDoc
, double fObjectValue
, double fGenerationValue
);
125 bool Read(SvStream
& rStream
) override
;
126 PDFElement
* Lookup(const OString
& rDictionaryKey
);
127 PDFObjectElement
* LookupObject(const OString
& rDictionaryKey
);
128 double GetObjectValue() const;
129 void SetDictionaryOffset(sal_uInt64 nDictionaryOffset
);
130 sal_uInt64
GetDictionaryOffset();
131 void SetDictionaryLength(sal_uInt64 nDictionaryLength
);
132 sal_uInt64
GetDictionaryLength();
133 PDFDictionaryElement
* GetDictionary();
134 void SetDictionary(PDFDictionaryElement
* pDictionaryElement
);
135 void SetNumberElement(PDFNumberElement
* pNumberElement
);
136 PDFNumberElement
* GetNumberElement() const;
137 /// Get access to the parsed key-value items from the object dictionary.
138 const std::map
<OString
, PDFElement
*>& GetDictionaryItems();
139 const std::vector
<PDFReferenceElement
*>& GetDictionaryReferences() const;
140 void AddDictionaryReference(PDFReferenceElement
* pReference
);
141 void SetArray(PDFArrayElement
* pArrayElement
);
142 void SetStream(PDFStreamElement
* pStreamElement
);
143 /// Access to the stream of the object, if it has any.
144 PDFStreamElement
* GetStream() const;
145 void SetArrayOffset(sal_uInt64 nArrayOffset
);
146 sal_uInt64
GetArrayOffset() const;
147 void SetArrayLength(sal_uInt64 nArrayLength
);
148 sal_uInt64
GetArrayLength() const;
149 PDFArrayElement
* GetArray();
150 /// Parse objects stored in this object stream.
151 void ParseStoredObjects();
152 std::vector
<std::unique_ptr
<PDFElement
>>& GetStoredElements();
153 SvMemoryStream
* GetStreamBuffer() const;
154 void SetStreamBuffer(std::unique_ptr
<SvMemoryStream
>& pStreamBuffer
);
155 PDFDocument
& GetDocument();
157 void writeString(OStringBuffer
& /*rBuffer*/) override
{ assert(false && "not implemented"); }
160 /// Array object: a list.
161 class VCL_DLLPUBLIC PDFArrayElement
: public PDFElement
163 std::vector
<PDFElement
*> m_aElements
;
164 /// The object that contains this array.
165 PDFObjectElement
* const m_pObject
;
168 PDFArrayElement(PDFObjectElement
* pObject
);
169 bool Read(SvStream
& rStream
) override
;
170 void PushBack(PDFElement
* pElement
);
171 const std::vector
<PDFElement
*>& GetElements() const;
172 PDFElement
* GetElement(size_t nIndex
) const { return m_aElements
[nIndex
]; }
174 void writeString(OStringBuffer
& rBuffer
) override
176 rBuffer
.append("[ ");
177 for (auto& rElement
: m_aElements
)
179 rElement
->writeString(rBuffer
);
186 /// Reference object: something with a unique ID.
187 class VCL_DLLPUBLIC PDFReferenceElement
: public PDFElement
191 int m_fGenerationValue
;
192 /// Location after the 'R' token.
193 sal_uInt64 m_nOffset
= 0;
194 /// The element providing the object number.
195 PDFNumberElement
& m_rObject
;
198 PDFReferenceElement(PDFDocument
& rDoc
, PDFNumberElement
& rObject
,
199 PDFNumberElement
const& rGeneration
);
200 bool Read(SvStream
& rStream
) override
;
201 /// Assuming the reference points to a number object, return its value.
202 double LookupNumber(SvStream
& rStream
) const;
203 /// Lookup referenced object, without assuming anything about its contents.
204 PDFObjectElement
* LookupObject();
205 int GetObjectValue() const;
206 int GetGenerationValue() const;
207 sal_uInt64
GetOffset() const;
208 PDFNumberElement
& GetObjectElement() const;
210 void writeString(OStringBuffer
& rBuffer
) override
212 rBuffer
.append(sal_Int32(GetObjectValue()));
214 rBuffer
.append(sal_Int32(GetGenerationValue()));
215 rBuffer
.append(" R");
219 /// Stream object: a byte array with a known length.
220 class VCL_DLLPUBLIC PDFStreamElement
: public PDFElement
222 size_t const m_nLength
;
223 sal_uInt64 m_nOffset
;
224 /// The byte array itself.
225 SvMemoryStream m_aMemory
;
228 explicit PDFStreamElement(size_t nLength
);
229 bool Read(SvStream
& rStream
) override
;
230 sal_uInt64
GetOffset() const;
231 SvMemoryStream
& GetMemory();
233 void writeString(OStringBuffer
& rBuffer
) override
235 rBuffer
.append("stream\n");
236 rBuffer
.append(static_cast<const char*>(m_aMemory
.GetData()), m_aMemory
.GetSize());
237 rBuffer
.append("\nendstream\n");
241 /// Name object: a key string.
242 class VCL_DLLPUBLIC PDFNameElement final
: public PDFElement
245 /// Offset after the '/' token.
246 sal_uInt64 m_nLocation
= 0;
250 bool Read(SvStream
& rStream
) override
;
251 void SetValue(const OString
& rValue
) { m_aValue
= rValue
; }
252 const OString
& GetValue() const;
253 sal_uInt64
GetLocation() const;
254 sal_uInt64
GetLength() { return m_aValue
.getLength(); }
256 void writeString(OStringBuffer
& rBuffer
) override
259 rBuffer
.append(m_aValue
);
263 /// Dictionary object: a set key-value pairs.
264 class VCL_DLLPUBLIC PDFDictionaryElement
: public PDFElement
266 /// Key-value pairs when the dictionary is a nested value.
267 std::map
<OString
, PDFElement
*> m_aItems
;
268 /// Offset after the '<<' token.
269 sal_uInt64 m_nLocation
= 0;
270 /// Position after the '/' token.
271 std::map
<OString
, sal_uInt64
> m_aDictionaryKeyOffset
;
272 /// Length of the dictionary key and value, till (before) the next token.
273 std::map
<OString
, sal_uInt64
> m_aDictionaryKeyValueLength
;
276 PDFDictionaryElement();
277 bool Read(SvStream
& rStream
) override
;
279 static PDFElement
* Lookup(const std::map
<OString
, PDFElement
*>& rDictionary
,
280 const OString
& rKey
);
281 void SetKeyOffset(const OString
& rKey
, sal_uInt64 nOffset
);
282 sal_uInt64
GetKeyOffset(const OString
& rKey
) const;
283 void SetKeyValueLength(const OString
& rKey
, sal_uInt64 nLength
);
284 sal_uInt64
GetKeyValueLength(const OString
& rKey
) const;
285 const std::map
<OString
, PDFElement
*>& GetItems() const;
286 /// Looks up an object which is only referenced in this dictionary.
287 PDFObjectElement
* LookupObject(const OString
& rDictionaryKey
);
288 /// Looks up an element which is contained in this dictionary.
289 PDFElement
* LookupElement(const OString
& rDictionaryKey
);
290 sal_uInt64
GetLocation() const { return m_nLocation
; }
291 void insert(OString
const& rKey
, PDFElement
* pPDFElement
)
293 m_aItems
.emplace(rKey
, pPDFElement
);
296 void writeString(OStringBuffer
& rBuffer
) override
298 rBuffer
.append("<< ");
299 for (auto& rPair
: m_aItems
)
302 rBuffer
.append(rPair
.first
);
304 rPair
.second
->writeString(rBuffer
);
307 rBuffer
.append(">>");
311 enum class TokenizeMode
315 /// Till the first %%EOF token.
317 /// Till the end of the current object.
319 /// Same as END_OF_OBJECT, but for object streams (no endobj keyword).
323 /// The type column of an entry in a cross-reference stream.
324 enum class XRefEntryType
326 /// xref "f" or xref stream "0".
328 /// xref "n" or xref stream "1".
334 /// An entry in a cross-reference stream.
337 XRefEntryType m_eType
= XRefEntryType::NOT_COMPRESSED
;
339 * Non-compressed: The byte offset of the object, starting from the
340 * beginning of the file.
341 * Compressed: The object number of the object stream in which this object is
344 sal_uInt64 m_nOffset
= 0;
345 /// Are changed as part of an incremental update?.
346 bool m_bDirty
= false;
351 void SetType(XRefEntryType eType
) { m_eType
= eType
; }
353 XRefEntryType
GetType() const { return m_eType
; }
355 void SetOffset(sal_uInt64 nOffset
) { m_nOffset
= nOffset
; }
357 sal_uInt64
GetOffset() const { return m_nOffset
; }
359 void SetDirty(bool bDirty
) { m_bDirty
= bDirty
; }
361 bool GetDirty() const { return m_bDirty
; }
364 /// Hex string: in <AABB> form.
365 class VCL_DLLPUBLIC PDFHexStringElement final
: public PDFElement
370 bool Read(SvStream
& rStream
) override
;
371 const OString
& GetValue() const;
373 void writeString(OStringBuffer
& rBuffer
) override
376 rBuffer
.append(m_aValue
);
381 /// Literal string: in (asdf) form.
382 class VCL_DLLPUBLIC PDFLiteralStringElement final
: public PDFElement
387 bool Read(SvStream
& rStream
) override
;
388 const OString
& GetValue() const;
390 void writeString(OStringBuffer
& rBuffer
) override
393 rBuffer
.append(m_aValue
);
398 /// Numbering object: an integer or a real.
399 class VCL_DLLPUBLIC PDFNumberElement
: public PDFElement
401 /// Input file start location.
402 sal_uInt64 m_nOffset
= 0;
403 /// Input file token length.
404 sal_uInt64 m_nLength
= 0;
409 bool Read(SvStream
& rStream
) override
;
410 double GetValue() const;
411 void SetValue(double fValue
) { m_fValue
= fValue
; }
413 sal_uInt64
GetLocation() const;
414 sal_uInt64
GetLength() const;
416 void writeString(OStringBuffer
& rBuffer
) override
{ rBuffer
.append(m_fValue
); }
419 /// A one-liner comment.
420 class VCL_DLLPUBLIC PDFCommentElement
: public PDFElement
426 explicit PDFCommentElement(PDFDocument
& rDoc
);
427 bool Read(SvStream
& rStream
) override
;
428 void writeString(OStringBuffer
& /*rBuffer*/) override
{}
431 /// End of a dictionary: '>>'.
432 class VCL_DLLPUBLIC PDFEndDictionaryElement
: public PDFElement
434 /// Offset before the '>>' token.
435 sal_uInt64 m_nLocation
= 0;
438 PDFEndDictionaryElement();
439 bool Read(SvStream
& rStream
) override
;
440 sal_uInt64
GetLocation() const;
442 void writeString(OStringBuffer
& /*rBuffer*/) override
{}
445 /// End of a stream: 'endstream' keyword.
446 class VCL_DLLPUBLIC PDFEndStreamElement
: public PDFElement
449 bool Read(SvStream
& rStream
) override
;
451 void writeString(OStringBuffer
& /*rBuffer*/) override
{}
454 /// End of an object: 'endobj' keyword.
455 class VCL_DLLPUBLIC PDFEndObjectElement
: public PDFElement
458 bool Read(SvStream
& rStream
) override
;
460 void writeString(OStringBuffer
& /*rBuffer*/) override
{}
463 /// End of an array: ']'.
464 class VCL_DLLPUBLIC PDFEndArrayElement
: public PDFElement
466 /// Location before the ']' token.
467 sal_uInt64 m_nOffset
= 0;
470 PDFEndArrayElement();
471 bool Read(SvStream
& rStream
) override
;
472 sal_uInt64
GetOffset() const;
474 void writeString(OStringBuffer
& /*rBuffer*/) override
{}
477 /// Boolean object: a 'true' or a 'false'.
478 class VCL_DLLPUBLIC PDFBooleanElement
: public PDFElement
483 explicit PDFBooleanElement(bool bValue
)
488 bool Read(SvStream
& rStream
) override
;
490 void writeString(OStringBuffer
& rBuffer
) override
492 rBuffer
.append(m_aValue
? "true" : "false");
496 /// Null object: the 'null' singleton.
497 class VCL_DLLPUBLIC PDFNullElement
: public PDFElement
500 bool Read(SvStream
& rStream
) override
;
502 void writeString(OStringBuffer
& rBuffer
) override
{ rBuffer
.append("null"); }
506 * In-memory representation of an on-disk PDF document.
508 * The PDF element list is not meant to be saved back to disk, but some
509 * elements remember their source offset / length, and based on that it's
510 * possible to modify the input file.
512 class VCL_DLLPUBLIC PDFDocument
: public PDFObjectContainer
514 /// This vector owns all elements.
515 std::vector
<std::unique_ptr
<PDFElement
>> m_aElements
;
516 /// Object ID <-> object offset map.
517 std::map
<size_t, XRefEntry
> m_aXRef
;
518 /// Object offset <-> Object pointer map.
519 std::map
<size_t, PDFObjectElement
*> m_aOffsetObjects
;
520 /// Object ID <-> Object pointer map.
521 std::map
<size_t, PDFObjectElement
*> m_aIDObjects
;
522 /// List of xref offsets we know.
523 std::vector
<size_t> m_aStartXRefs
;
524 /// Offsets of trailers, from latest to oldest.
525 std::vector
<size_t> m_aTrailerOffsets
;
526 /// Trailer offset <-> Trailer pointer map.
527 std::map
<size_t, PDFTrailerElement
*> m_aOffsetTrailers
;
528 /// List of EOF offsets we know.
529 std::vector
<size_t> m_aEOFs
;
530 PDFTrailerElement
* m_pTrailer
= nullptr;
531 /// When m_pTrailer is nullptr, this can still have a dictionary.
532 PDFObjectElement
* m_pXRefStream
= nullptr;
533 /// All editing takes place in this buffer, if it happens.
534 SvMemoryStream m_aEditBuffer
;
536 /// Signature line in PDF format, to be consumed by the next Sign() invocation.
537 std::vector
<sal_Int8
> m_aSignatureLine
;
539 /// 0-based page number where m_aSignatureLine should be placed.
540 size_t m_nSignaturePage
= 0;
542 /// Suggest a minimal, yet free signature ID to use for the next signature.
543 sal_uInt32
GetNextSignature();
544 /// Write the signature object as part of signing.
545 sal_Int32
WriteSignatureObject(const OUString
& rDescription
, bool bAdES
,
546 sal_uInt64
& rLastByteRangeOffset
, sal_Int64
& rContentOffset
);
547 /// Write the appearance object as part of signing.
548 sal_Int32
WriteAppearanceObject(tools::Rectangle
& rSignatureRectangle
);
549 /// Write the annot object as part of signing.
550 sal_Int32
WriteAnnotObject(PDFObjectElement
const& rFirstPage
, sal_Int32 nSignatureId
,
551 sal_Int32 nAppearanceId
,
552 const tools::Rectangle
& rSignatureRectangle
);
553 /// Write the updated Page object as part of signing.
554 bool WritePageObject(PDFObjectElement
& rFirstPage
, sal_Int32 nAnnotId
);
555 /// Write the updated Catalog object as part of signing.
556 bool WriteCatalogObject(sal_Int32 nAnnotId
, PDFReferenceElement
*& pRoot
);
557 /// Write the updated cross-references as part of signing.
558 void WriteXRef(sal_uInt64 nXRefOffset
, PDFReferenceElement
const* pRoot
);
562 virtual ~PDFDocument();
563 PDFDocument
& operator=(const PDFDocument
&) = delete;
564 PDFDocument(const PDFDocument
&) = delete;
565 /// @name Low-level functions, to be used by PDFElement subclasses.
567 /// Decode a hex dump.
568 static std::vector
<unsigned char> DecodeHexString(PDFHexStringElement
const* pElement
);
569 static OString
ReadKeyword(SvStream
& rStream
);
570 static size_t FindStartXRef(SvStream
& rStream
);
571 void ReadXRef(SvStream
& rStream
);
572 void ReadXRefStream(SvStream
& rStream
);
573 static void SkipWhitespace(SvStream
& rStream
);
574 /// Instead of all whitespace, just skip CR and NL characters.
575 static void SkipLineBreaks(SvStream
& rStream
);
576 size_t GetObjectOffset(size_t nIndex
) const;
577 const std::vector
<std::unique_ptr
<PDFElement
>>& GetElements() const;
578 std::vector
<PDFObjectElement
*> GetPages();
579 PDFObjectElement
* GetCatalog();
580 /// Remember the end location of an EOF token.
581 void PushBackEOF(size_t nOffset
);
582 /// Look up object based on object number, possibly by parsing object streams.
583 PDFObjectElement
* LookupObject(size_t nObjectNumber
);
584 /// Access to the input document, even after the input stream is gone.
585 SvMemoryStream
& GetEditBuffer();
586 /// Tokenize elements from current offset.
587 bool Tokenize(SvStream
& rStream
, TokenizeMode eMode
,
588 std::vector
<std::unique_ptr
<PDFElement
>>& rElements
,
589 PDFObjectElement
* pObjectElement
);
590 /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
591 void SetIDObject(size_t nID
, PDFObjectElement
* pObject
);
594 /// @name High-level functions, to be used by others.
596 /// Read elements from the start of the stream till its end.
597 bool Read(SvStream
& rStream
);
598 void SetSignatureLine(const std::vector
<sal_Int8
>& rSignatureLine
);
599 void SetSignaturePage(size_t nPage
);
600 /// Sign the read document with xCertificate in the edit buffer.
601 bool Sign(const css::uno::Reference
<css::security::XCertificate
>& xCertificate
,
602 const OUString
& rDescription
, bool bAdES
);
603 /// Serializes the contents of the edit buffer.
604 bool Write(SvStream
& rStream
);
605 /// Get a list of signatures embedded into this document.
606 std::vector
<PDFObjectElement
*> GetSignatureWidgets();
608 * Get the value of the "modification detection and prevention" permission:
609 * Valid values are 1, 2 and 3: only 3 allows annotations after signing.
612 /// Remove the nth signature from read document in the edit buffer.
613 bool RemoveSignature(size_t nPosition
);
614 /// Get byte offsets of the end of incremental updates.
615 const std::vector
<size_t>& GetEOFs() const;
618 /// See vcl::PDFObjectContainer::createObject().
619 sal_Int32
createObject() override
;
620 /// See vcl::PDFObjectContainer::updateObject().
621 bool updateObject(sal_Int32 n
) override
;
622 /// See vcl::PDFObjectContainer::writeBuffer().
623 bool writeBuffer(const void* pBuffer
, sal_uInt64 nBytes
) override
;
626 /// The trailer singleton is at the end of the doc.
627 class VCL_DLLPUBLIC PDFTrailerElement
: public PDFElement
630 PDFDictionaryElement
* m_pDictionaryElement
;
631 /// Location of the end of the trailer token.
632 sal_uInt64 m_nOffset
= 0;
635 explicit PDFTrailerElement(PDFDocument
& rDoc
);
636 bool Read(SvStream
& rStream
) override
;
637 PDFElement
* Lookup(const OString
& rDictionaryKey
);
638 sal_uInt64
GetLocation() const;
640 void SetDictionary(PDFDictionaryElement
* pDictionaryElement
)
642 m_pDictionaryElement
= pDictionaryElement
;
645 PDFDictionaryElement
* GetDictionary() { return m_pDictionaryElement
; }
647 void writeString(OStringBuffer
& /*rBuffer*/) override
{ assert(false && "not implemented"); }
650 class VCL_DLLPUBLIC PDFObjectParser final
652 const std::vector
<std::unique_ptr
<PDFElement
>>& mrElements
;
655 PDFObjectParser(std::vector
<std::unique_ptr
<PDFElement
>> const& rElements
)
656 : mrElements(rElements
)
660 size_t parse(PDFElement
* pParsingElement
, size_t nStartIndex
= 0, int nCurrentDepth
= 0);
664 } // namespace xmlsecurity
666 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
668 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */