Branch libreoffice-5-0-4
[LibreOffice.git] / sdext / source / pdfimport / inc / pdfparse.hxx
blob192f606a7d3c698a46ae8249b376041a5b3b3e4f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
21 #define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
23 #include <sal/types.h>
24 #include <rtl/ustring.hxx>
25 #include <rtl/string.hxx>
27 #include <unordered_map>
28 #include <vector>
30 namespace pdfparse
33 struct EmitImplData;
34 struct PDFContainer;
35 class EmitContext
37 public:
38 virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
39 virtual unsigned int getCurPos() = 0;
40 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
41 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
43 EmitContext( const PDFContainer* pTop = NULL );
44 virtual ~EmitContext();
46 // set this to deflate contained streams
47 bool m_bDeflate;
48 // set this to decrypt the PDF file
49 bool m_bDecrypt;
51 private:
52 friend struct PDFEntry;
53 EmitImplData* m_pImplData;
56 struct PDFEntry
58 PDFEntry() {}
59 virtual ~PDFEntry();
61 virtual bool emit( EmitContext& rWriteContext ) const = 0;
62 virtual PDFEntry* clone() const = 0;
64 protected:
65 static EmitImplData* getEmitData( EmitContext& rContext );
66 static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData );
69 struct PDFComment : public PDFEntry
71 OString m_aComment;
73 PDFComment( const OString& rComment )
74 : PDFEntry(), m_aComment( rComment ) {}
75 virtual ~PDFComment();
76 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
77 virtual PDFEntry* clone() const SAL_OVERRIDE;
80 struct PDFValue : public PDFEntry
82 // abstract base class for simple values
83 PDFValue() : PDFEntry() {}
84 virtual ~PDFValue();
87 struct PDFName : public PDFValue
89 OString m_aName;
91 PDFName( const OString& rName )
92 : PDFValue(), m_aName( rName ) {}
93 virtual ~PDFName();
94 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
95 virtual PDFEntry* clone() const SAL_OVERRIDE;
97 OUString getFilteredName() const;
100 struct PDFString : public PDFValue
102 OString m_aString;
104 PDFString( const OString& rString )
105 : PDFValue(), m_aString( rString ) {}
106 virtual ~PDFString();
107 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
108 virtual PDFEntry* clone() const SAL_OVERRIDE;
110 OString getFilteredString() const;
113 struct PDFNumber : public PDFValue
115 double m_fValue;
117 PDFNumber( double fVal )
118 : PDFValue(), m_fValue( fVal ) {}
119 virtual ~PDFNumber();
120 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
121 virtual PDFEntry* clone() const SAL_OVERRIDE;
124 struct PDFBool : public PDFValue
126 bool m_bValue;
128 PDFBool( bool bVal )
129 : PDFValue(), m_bValue( bVal ) {}
130 virtual ~PDFBool();
131 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
132 virtual PDFEntry* clone() const SAL_OVERRIDE;
135 struct PDFObjectRef : public PDFValue
137 unsigned int m_nNumber;
138 unsigned int m_nGeneration;
140 PDFObjectRef( unsigned int nNr, unsigned int nGen )
141 : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
142 virtual ~PDFObjectRef();
143 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
144 virtual PDFEntry* clone() const SAL_OVERRIDE;
147 struct PDFNull : public PDFValue
149 PDFNull() {}
150 virtual ~PDFNull();
151 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
152 virtual PDFEntry* clone() const SAL_OVERRIDE;
155 struct PDFObject;
156 struct PDFContainer : public PDFEntry
158 sal_Int32 m_nOffset;
159 std::vector<PDFEntry*> m_aSubElements;
161 // this is an abstract base class for identifying
162 // entries that can contain sub elements besides comments
163 PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
164 virtual ~PDFContainer();
165 bool emitSubElements( EmitContext& rWriteContext ) const;
166 void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
168 PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
169 PDFObject* findObject( PDFObjectRef* pRef ) const
170 { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
173 struct PDFArray : public PDFContainer
175 PDFArray() {}
176 virtual ~PDFArray();
177 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
178 virtual PDFEntry* clone() const SAL_OVERRIDE;
181 struct PDFDict : public PDFContainer
183 typedef std::unordered_map<OString,PDFEntry*,OStringHash> Map;
184 Map m_aMap;
186 PDFDict() {}
187 virtual ~PDFDict();
188 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
189 virtual PDFEntry* clone() const SAL_OVERRIDE;
191 // inserting a value of NULL will remove rName and the previous value
192 // from the dictionary
193 void insertValue( const OString& rName, PDFEntry* pValue );
194 // removes a name/value pair from the dict
195 void eraseValue( const OString& rName );
196 // builds new map as of sub elements
197 // returns NULL if successful, else the first offending element
198 PDFEntry* buildMap();
201 struct PDFStream : public PDFEntry
203 unsigned int m_nBeginOffset;
204 unsigned int m_nEndOffset; // offset of the byte after the stream
205 PDFDict* m_pDict;
207 PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
208 : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
209 virtual ~PDFStream();
210 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
211 virtual PDFEntry* clone() const SAL_OVERRIDE;
213 unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
216 struct PDFTrailer : public PDFContainer
218 PDFDict* m_pDict;
220 PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
221 virtual ~PDFTrailer();
222 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
223 virtual PDFEntry* clone() const SAL_OVERRIDE;
226 struct PDFFileImplData;
227 struct PDFFile : public PDFContainer
229 private:
230 mutable PDFFileImplData* m_pData;
231 PDFFileImplData* impl_getData() const;
232 public:
233 unsigned int m_nMajor; // PDF major
234 unsigned int m_nMinor; // PDF minor
236 PDFFile()
237 : PDFContainer(),
238 m_pData( NULL ),
239 m_nMajor( 0 ), m_nMinor( 0 )
241 virtual ~PDFFile();
243 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
244 virtual PDFEntry* clone() const SAL_OVERRIDE;
246 bool isEncrypted() const;
248 bool usesSupportedEncryptionFormat() const;
250 // this method checks whether rPwd is compatible with
251 // either user or owner password and sets up decrypt data in that case
252 // returns true if decryption can be done
253 bool setupDecryptionData( const OString& rPwd ) const;
255 bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
256 sal_uInt8* pOutBuffer,
257 unsigned int nObject, unsigned int nGeneration ) const;
259 OUString getDecryptionKey() const;
262 struct PDFObject : public PDFContainer
264 PDFEntry* m_pObject;
265 PDFStream* m_pStream;
266 unsigned int m_nNumber;
267 unsigned int m_nGeneration;
269 PDFObject( unsigned int nNr, unsigned int nGen )
270 : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
271 virtual ~PDFObject();
272 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
273 virtual PDFEntry* clone() const SAL_OVERRIDE;
275 // writes only the contained stream, deflated if necessary
276 bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
278 private:
279 // returns true if stream is deflated
280 // fills *ppStream and *pBytes with start of stream and count of bytes
281 // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
282 // fills in NULL and 0 in case of error
283 bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
286 struct PDFPart : public PDFContainer
288 PDFPart() : PDFContainer() {}
289 virtual ~PDFPart();
290 virtual bool emit( EmitContext& rWriteContext ) const SAL_OVERRIDE;
291 virtual PDFEntry* clone() const SAL_OVERRIDE;
294 class PDFReader
296 public:
297 PDFReader() {}
298 ~PDFReader() {}
300 static PDFEntry* read( const char* pFileName );
301 #ifdef WIN32
302 static PDFEntry* read( const char* pBuffer, unsigned int nLen );
303 #endif
306 } // namespace
308 #endif
310 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */