Version 6.1.0.2, tag libreoffice-6.1.0.2
[LibreOffice.git] / sdext / source / pdfimport / inc / pdfparse.hxx
blob90aaeab292ae8f8ad96ec8b432ded2911a0bd2c6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
21 #define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
23 #include <sal/types.h>
24 #include <rtl/ustring.hxx>
25 #include <rtl/string.hxx>
27 #include <unordered_map>
28 #include <vector>
29 #include <memory>
31 namespace pdfparse
34 struct EmitImplData;
35 struct PDFContainer;
36 class EmitContext
38 public:
39 virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
40 virtual unsigned int getCurPos() = 0;
41 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
42 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
44 explicit EmitContext( const PDFContainer* pTop = nullptr );
45 virtual ~EmitContext();
47 // set this to deflate contained streams
48 bool m_bDeflate;
49 // set this to decrypt the PDF file
50 bool m_bDecrypt;
52 private:
53 friend struct PDFEntry;
54 std::unique_ptr<EmitImplData> m_pImplData;
57 struct PDFEntry
59 PDFEntry() {}
60 virtual ~PDFEntry();
62 virtual bool emit( EmitContext& rWriteContext ) const = 0;
63 virtual PDFEntry* clone() const = 0;
65 protected:
66 static EmitImplData* getEmitData( EmitContext const & rContext );
67 static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData );
70 struct PDFComment : public PDFEntry
72 OString m_aComment;
74 explicit PDFComment( const OString& rComment )
75 : PDFEntry(), m_aComment( rComment ) {}
76 virtual ~PDFComment() override;
77 virtual bool emit( EmitContext& rWriteContext ) const override;
78 virtual PDFEntry* clone() const override;
81 struct PDFValue : public PDFEntry
83 // abstract base class for simple values
84 PDFValue() : PDFEntry() {}
85 virtual ~PDFValue() override;
88 struct PDFName : public PDFValue
90 OString m_aName;
92 explicit PDFName( const OString& rName )
93 : PDFValue(), m_aName( rName ) {}
94 virtual ~PDFName() override;
95 virtual bool emit( EmitContext& rWriteContext ) const override;
96 virtual PDFEntry* clone() const override;
98 OUString getFilteredName() const;
101 struct PDFString : public PDFValue
103 OString m_aString;
105 explicit PDFString( const OString& rString )
106 : PDFValue(), m_aString( rString ) {}
107 virtual ~PDFString() override;
108 virtual bool emit( EmitContext& rWriteContext ) const override;
109 virtual PDFEntry* clone() const override;
111 OString getFilteredString() const;
114 struct PDFNumber : public PDFValue
116 double m_fValue;
118 explicit PDFNumber( double fVal )
119 : PDFValue(), m_fValue( fVal ) {}
120 virtual ~PDFNumber() override;
121 virtual bool emit( EmitContext& rWriteContext ) const override;
122 virtual PDFEntry* clone() const override;
125 struct PDFBool : public PDFValue
127 bool m_bValue;
129 explicit PDFBool( bool bVal )
130 : PDFValue(), m_bValue( bVal ) {}
131 virtual ~PDFBool() override;
132 virtual bool emit( EmitContext& rWriteContext ) const override;
133 virtual PDFEntry* clone() const override;
136 struct PDFObjectRef : public PDFValue
138 unsigned int m_nNumber;
139 unsigned int m_nGeneration;
141 PDFObjectRef( unsigned int nNr, unsigned int nGen )
142 : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
143 virtual ~PDFObjectRef() override;
144 virtual bool emit( EmitContext& rWriteContext ) const override;
145 virtual PDFEntry* clone() const override;
148 struct PDFNull : public PDFValue
150 PDFNull() {}
151 virtual ~PDFNull() override;
152 virtual bool emit( EmitContext& rWriteContext ) const override;
153 virtual PDFEntry* clone() const override;
156 struct PDFObject;
157 struct PDFContainer : public PDFEntry
159 sal_Int32 m_nOffset;
160 std::vector<std::unique_ptr<PDFEntry>> m_aSubElements;
162 // this is an abstract base class for identifying
163 // entries that can contain sub elements besides comments
164 PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
165 virtual ~PDFContainer() override;
166 bool emitSubElements( EmitContext& rWriteContext ) const;
167 void cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const;
169 PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
170 PDFObject* findObject( PDFObjectRef const * pRef ) const
171 { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
174 struct PDFArray : public PDFContainer
176 PDFArray() {}
177 virtual ~PDFArray() override;
178 virtual bool emit( EmitContext& rWriteContext ) const override;
179 virtual PDFEntry* clone() const override;
182 struct PDFDict : public PDFContainer
184 typedef std::unordered_map<OString,PDFEntry*> Map;
185 Map m_aMap;
187 PDFDict() {}
188 virtual ~PDFDict() override;
189 virtual bool emit( EmitContext& rWriteContext ) const override;
190 virtual PDFEntry* clone() const override;
192 // inserting a value of NULL will remove rName and the previous value
193 // from the dictionary
194 void insertValue( const OString& rName, PDFEntry* pValue );
195 // removes a name/value pair from the dict
196 void eraseValue( const OString& rName );
197 // builds new map as of sub elements
198 // returns NULL if successful, else the first offending element
199 PDFEntry* buildMap();
202 struct PDFStream : public PDFEntry
204 unsigned int m_nBeginOffset;
205 unsigned int m_nEndOffset; // offset of the byte after the stream
206 PDFDict* m_pDict;
208 PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
209 : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
210 virtual ~PDFStream() override;
211 virtual bool emit( EmitContext& rWriteContext ) const override;
212 virtual PDFEntry* clone() const override;
214 unsigned int getDictLength( const PDFContainer* pObjectContainer ) const; // get contents of the "Length" entry of the dict
217 struct PDFTrailer : public PDFContainer
219 PDFDict* m_pDict;
221 PDFTrailer() : PDFContainer(), m_pDict( nullptr ) {}
222 virtual ~PDFTrailer() override;
223 virtual bool emit( EmitContext& rWriteContext ) const override;
224 virtual PDFEntry* clone() const override;
227 struct PDFFileImplData;
228 struct PDFFile : public PDFContainer
230 private:
231 mutable std::unique_ptr<PDFFileImplData> m_pData;
232 PDFFileImplData* impl_getData() const;
233 public:
234 unsigned int m_nMajor; // PDF major
235 unsigned int m_nMinor; // PDF minor
237 PDFFile();
238 virtual ~PDFFile() override;
240 virtual bool emit( EmitContext& rWriteContext ) const override;
241 virtual PDFEntry* clone() const override;
243 bool isEncrypted() const;
245 bool usesSupportedEncryptionFormat() const;
247 // this method checks whether rPwd is compatible with
248 // either user or owner password and sets up decrypt data in that case
249 // returns true if decryption can be done
250 bool setupDecryptionData( const OString& rPwd ) const;
252 bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
253 sal_uInt8* pOutBuffer,
254 unsigned int nObject, unsigned int nGeneration ) const;
257 struct PDFObject : public PDFContainer
259 PDFEntry* m_pObject;
260 PDFStream* m_pStream;
261 unsigned int m_nNumber;
262 unsigned int m_nGeneration;
264 PDFObject( unsigned int nNr, unsigned int nGen )
265 : m_pObject( nullptr ), m_pStream( nullptr ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
266 virtual ~PDFObject() override;
267 virtual bool emit( EmitContext& rWriteContext ) const override;
268 virtual PDFEntry* clone() const override;
270 // writes only the contained stream, deflated if necessary
271 void writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
273 private:
274 // returns true if stream is deflated
275 // fills *ppStream and *pBytes with start of stream and count of bytes
276 // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
277 // fills in NULL and 0 in case of error
278 bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
281 struct PDFPart : public PDFContainer
283 PDFPart() : PDFContainer() {}
284 virtual ~PDFPart() override;
285 virtual bool emit( EmitContext& rWriteContext ) const override;
286 virtual PDFEntry* clone() const override;
289 class PDFReader
291 public:
292 PDFReader() {}
294 static PDFEntry* read( const char* pFileName );
295 #ifdef _WIN32
296 static PDFEntry* read( const char* pBuffer, unsigned int nLen );
297 #endif
300 } // namespace
302 #endif
304 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */