1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
29 #define Max( a, b ) (((a)>(b)) ? (a) : (b) )
30 #define Min( a, b ) (((a)<(b)) ? (a) : (b) )
34 * Text2UnicodeConverter
37 namespace sax_expatwrap
{
39 class Text2UnicodeConverter
43 Text2UnicodeConverter( const ::rtl::OString
& sEncoding
);
44 ~Text2UnicodeConverter();
46 ::com::sun::star::uno::Sequence
< sal_Unicode
> convert( const ::com::sun::star::uno::Sequence
<sal_Int8
> & );
47 sal_Bool
canContinue() { return m_bCanContinue
; }
50 void init( rtl_TextEncoding encoding
);
52 rtl_TextToUnicodeConverter m_convText2Unicode
;
53 rtl_TextToUnicodeContext m_contextText2Unicode
;
54 sal_Bool m_bCanContinue
;
55 sal_Bool m_bInitialized
;
56 rtl_TextEncoding m_rtlEncoding
;
57 ::com::sun::star::uno::Sequence
<sal_Int8
> m_seqSource
;
60 /*----------------------------------------
62 * Unicode2TextConverter
64 **-----------------------------------------*/
65 class Unicode2TextConverter
68 Unicode2TextConverter( rtl_TextEncoding encoding
);
69 ~Unicode2TextConverter();
71 inline ::com::sun::star::uno::Sequence
<sal_Int8
> convert( const ::rtl::OUString
&s
)
73 return convert( s
.getStr() , s
.getLength() );
75 ::com::sun::star::uno::Sequence
<sal_Int8
> convert( const sal_Unicode
* , sal_Int32 nLength
);
76 sal_Bool
canContinue() { return m_bCanContinue
; }
79 void init( rtl_TextEncoding encoding
);
81 rtl_UnicodeToTextConverter m_convUnicode2Text
;
82 rtl_UnicodeToTextContext m_contextUnicode2Text
;
83 sal_Bool m_bCanContinue
;
84 sal_Bool m_bInitialized
;
85 rtl_TextEncoding m_rtlEncoding
;
86 ::com::sun::star::uno::Sequence
<sal_Unicode
> m_seqSource
;
91 /*----------------------------------------
93 * XMLFile2UTFConverter
95 **-----------------------------------------*/
96 class XMLFile2UTFConverter
99 XMLFile2UTFConverter( ):
100 m_bStarted( sal_False
),
101 m_pText2Unicode( 0 ),
105 ~XMLFile2UTFConverter();
107 void setInputStream( ::com::sun::star::uno::Reference
< ::com::sun::star::io::XInputStream
> &r
) { m_in
= r
; }
108 void setEncoding( const ::rtl::OString
&s
) { m_sEncoding
= s
; }
112 // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
113 // may be returned less or more bytes than ordered.
114 sal_Int32
readAndConvert( ::com::sun::star::uno::Sequence
<sal_Int8
> &seq
, sal_Int32 nMaxToRead
)
115 throw ( ::com::sun::star::io::IOException
,
116 ::com::sun::star::io::NotConnectedException
,
117 ::com::sun::star::io::BufferSizeExceededException
,
118 ::com::sun::star::uno::RuntimeException
);
122 // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
123 // @return TRUE, when encoding information could be retrieved
124 // @return FALSE, when no encoding information was found in file
125 sal_Bool
scanForEncoding( ::com::sun::star::uno::Sequence
<sal_Int8
> &seq
);
127 // Called only on first Sequence of bytes. Tries to figure out
128 // if enough data is available to scan encoding
129 // @return TRUE, when encoding is retrievable
130 // @return FALSE, when more data is needed
131 sal_Bool
isEncodingRecognizable( const ::com::sun::star::uno::Sequence
< sal_Int8
> & seq
);
133 // When encoding attribute is within the text (in the first line), it is removed.
134 void removeEncoding( ::com::sun::star::uno::Sequence
<sal_Int8
> &seq
);
136 // Initializes decoding depending on m_sEncoding setting
137 void initializeDecoding();
139 ::com::sun::star::uno::Reference
< ::com::sun::star::io::XInputStream
> m_in
;
142 ::rtl::OString m_sEncoding
;
144 Text2UnicodeConverter
*m_pText2Unicode
;
145 Unicode2TextConverter
*m_pUnicode2Text
;