Update ooo320-m1
[ooovba.git] / sdext / source / pdfimport / inc / pdfparse.hxx
blob4ea411ffcdbce833f9db2f2a778a96c744543e75
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: pdfparse.hxx,v $
11 * $Revision: 1.2 $
13 * This file is part of OpenOffice.org.
15 * OpenOffice.org is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU Lesser General Public License version 3
17 * only, as published by the Free Software Foundation.
19 * OpenOffice.org is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License version 3 for more details
23 * (a copy is included in the LICENSE file that accompanied this code).
25 * You should have received a copy of the GNU Lesser General Public License
26 * version 3 along with OpenOffice.org. If not, see
27 * <http://www.openoffice.org/license.html>
28 * for a copy of the LGPLv3 License.
30 ************************************************************************/
32 #ifndef INCLUDED_PDFI_PDFPARSE_HXX
33 #define INCLUDED_PDFI_PDFPARSE_HXX
35 #include <sal/types.h>
36 #include <rtl/ustring.hxx>
37 #include <rtl/string.hxx>
39 #include <vector>
40 #include <hash_map>
42 namespace pdfparse
45 struct EmitImplData;
46 struct PDFContainer;
47 class EmitContext
49 public:
50 virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
51 virtual unsigned int getCurPos() = 0;
52 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
53 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
55 EmitContext( const PDFContainer* pTop = NULL );
56 virtual ~EmitContext();
58 // set this to deflate contained streams
59 bool m_bDeflate;
60 // set this to decrypt the PDF file
61 bool m_bDecrypt;
63 private:
64 friend struct PDFEntry;
65 EmitImplData* m_pImplData;
68 struct PDFEntry
70 PDFEntry() {}
71 virtual ~PDFEntry();
73 virtual bool emit( EmitContext& rWriteContext ) const = 0;
74 virtual PDFEntry* clone() const = 0;
76 protected:
77 EmitImplData* getEmitData( EmitContext& rContext ) const;
78 void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const;
81 struct PDFComment : public PDFEntry
83 rtl::OString m_aComment;
85 PDFComment( const rtl::OString& rComment )
86 : PDFEntry(), m_aComment( rComment ) {}
87 virtual ~PDFComment();
88 virtual bool emit( EmitContext& rWriteContext ) const;
89 virtual PDFEntry* clone() const;
92 struct PDFValue : public PDFEntry
94 // abstract base class for simple values
95 PDFValue() : PDFEntry() {}
96 virtual ~PDFValue();
99 struct PDFName : public PDFValue
101 rtl::OString m_aName;
103 PDFName( const rtl::OString& rName )
104 : PDFValue(), m_aName( rName ) {}
105 virtual ~PDFName();
106 virtual bool emit( EmitContext& rWriteContext ) const;
107 virtual PDFEntry* clone() const;
109 rtl::OUString getFilteredName() const;
112 struct PDFString : public PDFValue
114 rtl::OString m_aString;
116 PDFString( const rtl::OString& rString )
117 : PDFValue(), m_aString( rString ) {}
118 virtual ~PDFString();
119 virtual bool emit( EmitContext& rWriteContext ) const;
120 virtual PDFEntry* clone() const;
122 rtl::OString getFilteredString() const;
125 struct PDFNumber : public PDFValue
127 double m_fValue;
129 PDFNumber( double fVal )
130 : PDFValue(), m_fValue( fVal ) {}
131 virtual ~PDFNumber();
132 virtual bool emit( EmitContext& rWriteContext ) const;
133 virtual PDFEntry* clone() const;
136 struct PDFBool : public PDFValue
138 bool m_bValue;
140 PDFBool( bool bVal )
141 : PDFValue(), m_bValue( bVal ) {}
142 virtual ~PDFBool();
143 virtual bool emit( EmitContext& rWriteContext ) const;
144 virtual PDFEntry* clone() const;
147 struct PDFObjectRef : public PDFValue
149 unsigned int m_nNumber;
150 unsigned int m_nGeneration;
152 PDFObjectRef( unsigned int nNr, unsigned int nGen )
153 : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
154 virtual ~PDFObjectRef();
155 virtual bool emit( EmitContext& rWriteContext ) const;
156 virtual PDFEntry* clone() const;
159 struct PDFNull : public PDFValue
161 PDFNull() {}
162 virtual ~PDFNull();
163 virtual bool emit( EmitContext& rWriteContext ) const;
164 virtual PDFEntry* clone() const;
167 struct PDFObject;
168 struct PDFContainer : public PDFEntry
170 sal_Int32 m_nOffset;
171 std::vector<PDFEntry*> m_aSubElements;
173 // this is an abstract base class for identifying
174 // entries that can contain sub elements besides comments
175 PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
176 virtual ~PDFContainer();
177 virtual bool emitSubElements( EmitContext& rWriteContext ) const;
178 virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
180 PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
181 PDFObject* findObject( PDFObjectRef* pRef ) const
182 { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
185 struct PDFArray : public PDFContainer
187 PDFArray() {}
188 virtual ~PDFArray();
189 virtual bool emit( EmitContext& rWriteContext ) const;
190 virtual PDFEntry* clone() const;
193 struct PDFDict : public PDFContainer
195 typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map;
196 Map m_aMap;
198 PDFDict() {}
199 virtual ~PDFDict();
200 virtual bool emit( EmitContext& rWriteContext ) const;
201 virtual PDFEntry* clone() const;
203 // inserting a value of NULL will remove rName and the previous value
204 // from the dictionary
205 void insertValue( const rtl::OString& rName, PDFEntry* pValue );
206 // removes a name/value pair from the dict
207 void eraseValue( const rtl::OString& rName );
208 // builds new map as of sub elements
209 // returns NULL if successfull, else the first offending element
210 PDFEntry* buildMap();
213 struct PDFStream : public PDFEntry
215 unsigned int m_nBeginOffset;
216 unsigned int m_nEndOffset; // offset of the byte after the stream
217 PDFDict* m_pDict;
219 PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
220 : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
221 virtual ~PDFStream();
222 virtual bool emit( EmitContext& rWriteContext ) const;
223 virtual PDFEntry* clone() const;
225 unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
228 struct PDFTrailer : public PDFContainer
230 PDFDict* m_pDict;
232 PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
233 virtual ~PDFTrailer();
234 virtual bool emit( EmitContext& rWriteContext ) const;
235 virtual PDFEntry* clone() const;
238 struct PDFFileImplData;
239 struct PDFFile : public PDFContainer
241 private:
242 mutable PDFFileImplData* m_pData;
243 PDFFileImplData* impl_getData() const;
244 public:
245 unsigned int m_nMajor; // PDF major
246 unsigned int m_nMinor; // PDF minor
248 PDFFile()
249 : PDFContainer(),
250 m_pData( NULL ),
251 m_nMajor( 0 ), m_nMinor( 0 )
253 virtual ~PDFFile();
255 virtual bool emit( EmitContext& rWriteContext ) const;
256 virtual PDFEntry* clone() const;
258 bool isEncrypted() const;
259 // this method checks whether rPwd is compatible with
260 // either user or owner password and sets up decrypt data in that case
261 // returns true if decryption can be done
262 bool setupDecryptionData( const rtl::OString& rPwd ) const;
264 bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
265 sal_uInt8* pOutBuffer,
266 unsigned int nObject, unsigned int nGeneration ) const;
269 struct PDFObject : public PDFContainer
271 PDFEntry* m_pObject;
272 PDFStream* m_pStream;
273 unsigned int m_nNumber;
274 unsigned int m_nGeneration;
276 PDFObject( unsigned int nNr, unsigned int nGen )
277 : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
278 virtual ~PDFObject();
279 virtual bool emit( EmitContext& rWriteContext ) const;
280 virtual PDFEntry* clone() const;
282 // writes only the contained stream, deflated if necessary
283 bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
285 private:
286 // returns true if stream is deflated
287 // fills *ppStream and *pBytes with start of stream and count of bytes
288 // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
289 // fills in NULL and 0 in case of error
290 bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
293 struct PDFPart : public PDFContainer
295 PDFPart() : PDFContainer() {}
296 virtual ~PDFPart();
297 virtual bool emit( EmitContext& rWriteContext ) const;
298 virtual PDFEntry* clone() const;
301 class PDFReader
303 public:
304 PDFReader() {}
305 ~PDFReader() {}
307 PDFEntry* read( const char* pFileName );
308 PDFEntry* read( const char* pBuffer, unsigned int nLen );
311 } // namespace
313 #endif