update dev300-m58
[ooovba.git] / sax / inc / xml2utf.hxx
blobeeb502e9d374c1031a929879acec742fc76b8456
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: xml2utf.hxx,v $
10 * $Revision: 1.3.10.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // TODO: Woher?
32 #define Max( a, b ) (((a)>(b)) ? (a) : (b) )
33 #define Min( a, b ) (((a)<(b)) ? (a) : (b) )
37 * Text2UnicodeConverter
39 **/
40 namespace sax_expatwrap {
42 class Text2UnicodeConverter
45 public:
46 Text2UnicodeConverter( const ::rtl::OString & sEncoding );
47 ~Text2UnicodeConverter();
49 ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & );
50 sal_Bool canContinue() { return m_bCanContinue; }
52 private:
53 void init( rtl_TextEncoding encoding );
55 rtl_TextToUnicodeConverter m_convText2Unicode;
56 rtl_TextToUnicodeContext m_contextText2Unicode;
57 sal_Bool m_bCanContinue;
58 sal_Bool m_bInitialized;
59 rtl_TextEncoding m_rtlEncoding;
60 ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource;
63 /*----------------------------------------
65 * Unicode2TextConverter
67 **-----------------------------------------*/
68 class Unicode2TextConverter
70 public:
71 Unicode2TextConverter( rtl_TextEncoding encoding );
72 ~Unicode2TextConverter();
74 inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s )
76 return convert( s.getStr() , s.getLength() );
78 ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
79 sal_Bool canContinue() { return m_bCanContinue; }
81 private:
82 void init( rtl_TextEncoding encoding );
84 rtl_UnicodeToTextConverter m_convUnicode2Text;
85 rtl_UnicodeToTextContext m_contextUnicode2Text;
86 sal_Bool m_bCanContinue;
87 sal_Bool m_bInitialized;
88 rtl_TextEncoding m_rtlEncoding;
89 ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource;
94 /*----------------------------------------
96 * XMLFile2UTFConverter
98 **-----------------------------------------*/
99 class XMLFile2UTFConverter
101 public:
102 XMLFile2UTFConverter( ):
103 m_bStarted( sal_False ),
104 m_pText2Unicode( 0 ),
105 m_pUnicode2Text( 0 )
108 ~XMLFile2UTFConverter();
110 void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; }
111 void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; }
115 // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
116 // may be returned less or more bytes than ordered.
117 sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
118 throw ( ::com::sun::star::io::IOException,
119 ::com::sun::star::io::NotConnectedException ,
120 ::com::sun::star::io::BufferSizeExceededException ,
121 ::com::sun::star::uno::RuntimeException );
123 private:
125 // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
126 // @return TRUE, when encoding information could be retrieved
127 // @return FALSE, when no encoding information was found in file
128 sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
130 // Called only on first Sequence of bytes. Tries to figure out
131 // if enough data is available to scan encoding
132 // @return TRUE, when encoding is retrievable
133 // @return FALSE, when more data is needed
134 sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq );
136 // When encoding attribute is within the text (in the first line), it is removed.
137 void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
139 // Initializes decoding depending on m_sEncoding setting
140 void initializeDecoding();
141 private:
142 ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in;
144 sal_Bool m_bStarted;
145 ::rtl::OString m_sEncoding;
147 Text2UnicodeConverter *m_pText2Unicode;
148 Unicode2TextConverter *m_pUnicode2Text;