merged tag ooo/OOO330_m14
[LibreOffice.git] / sax / inc / xml2utf.hxx
blob1a0640f5a68779115f423b54300451c5b8477b2a
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // TODO: Woher?
29 #define Max( a, b ) (((a)>(b)) ? (a) : (b) )
30 #define Min( a, b ) (((a)<(b)) ? (a) : (b) )
34 * Text2UnicodeConverter
36 **/
37 namespace sax_expatwrap {
39 class Text2UnicodeConverter
42 public:
43 Text2UnicodeConverter( const ::rtl::OString & sEncoding );
44 ~Text2UnicodeConverter();
46 ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & );
47 sal_Bool canContinue() { return m_bCanContinue; }
49 private:
50 void init( rtl_TextEncoding encoding );
52 rtl_TextToUnicodeConverter m_convText2Unicode;
53 rtl_TextToUnicodeContext m_contextText2Unicode;
54 sal_Bool m_bCanContinue;
55 sal_Bool m_bInitialized;
56 rtl_TextEncoding m_rtlEncoding;
57 ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource;
60 /*----------------------------------------
62 * Unicode2TextConverter
64 **-----------------------------------------*/
65 class Unicode2TextConverter
67 public:
68 Unicode2TextConverter( rtl_TextEncoding encoding );
69 ~Unicode2TextConverter();
71 inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s )
73 return convert( s.getStr() , s.getLength() );
75 ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
76 sal_Bool canContinue() { return m_bCanContinue; }
78 private:
79 void init( rtl_TextEncoding encoding );
81 rtl_UnicodeToTextConverter m_convUnicode2Text;
82 rtl_UnicodeToTextContext m_contextUnicode2Text;
83 sal_Bool m_bCanContinue;
84 sal_Bool m_bInitialized;
85 rtl_TextEncoding m_rtlEncoding;
86 ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource;
91 /*----------------------------------------
93 * XMLFile2UTFConverter
95 **-----------------------------------------*/
96 class XMLFile2UTFConverter
98 public:
99 XMLFile2UTFConverter( ):
100 m_bStarted( sal_False ),
101 m_pText2Unicode( 0 ),
102 m_pUnicode2Text( 0 )
105 ~XMLFile2UTFConverter();
107 void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; }
108 void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; }
112 // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
113 // may be returned less or more bytes than ordered.
114 sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
115 throw ( ::com::sun::star::io::IOException,
116 ::com::sun::star::io::NotConnectedException ,
117 ::com::sun::star::io::BufferSizeExceededException ,
118 ::com::sun::star::uno::RuntimeException );
120 private:
122 // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
123 // @return TRUE, when encoding information could be retrieved
124 // @return FALSE, when no encoding information was found in file
125 sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
127 // Called only on first Sequence of bytes. Tries to figure out
128 // if enough data is available to scan encoding
129 // @return TRUE, when encoding is retrievable
130 // @return FALSE, when more data is needed
131 sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq );
133 // When encoding attribute is within the text (in the first line), it is removed.
134 void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
136 // Initializes decoding depending on m_sEncoding setting
137 void initializeDecoding();
138 private:
139 ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in;
141 sal_Bool m_bStarted;
142 ::rtl::OString m_sEncoding;
144 Text2UnicodeConverter *m_pText2Unicode;
145 Unicode2TextConverter *m_pUnicode2Text;