1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: salcvt.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_vcl.hxx"
36 SalConverterCache::SalConverterCache()
41 SalConverterCache::GetInstance ()
43 static SalConverterCache
* pCvt
= NULL
;
45 pCvt
= new SalConverterCache
;
50 SalConverterCache::~SalConverterCache()
58 rtl_UnicodeToTextConverter
59 SalConverterCache::GetU2TConverter( rtl_TextEncoding nEncoding
)
61 if( rtl_isOctetTextEncoding( nEncoding
) )
63 ConverterT
& rConverter( m_aConverters
[ nEncoding
] );
64 if ( rConverter
.mpU2T
== NULL
)
67 rtl_createUnicodeToTextConverter( nEncoding
);
69 if ( rConverter
.mpU2T
== NULL
)
70 fprintf( stderr
, "failed to create Unicode -> %i converter\n", nEncoding
);
73 return rConverter
.mpU2T
;
78 rtl_TextToUnicodeConverter
79 SalConverterCache::GetT2UConverter( rtl_TextEncoding nEncoding
)
81 if( rtl_isOctetTextEncoding( nEncoding
) )
83 ConverterT
& rConverter( m_aConverters
[ nEncoding
] );
84 if ( rConverter
.mpT2U
== NULL
)
87 rtl_createTextToUnicodeConverter( nEncoding
);
89 if ( rConverter
.mpT2U
== NULL
)
90 fprintf( stderr
, "failed to create %i -> Unicode converter\n", nEncoding
);
93 return rConverter
.mpT2U
;
99 SalConverterCache::IsSingleByteEncoding( rtl_TextEncoding nEncoding
)
101 if( rtl_isOctetTextEncoding( nEncoding
) )
103 ConverterT
& rConverter( m_aConverters
[ nEncoding
] );
104 if ( ! rConverter
.mbValid
)
106 rConverter
.mbValid
= True
;
108 rtl_TextEncodingInfo aTextEncInfo
;
109 aTextEncInfo
.StructSize
= sizeof( aTextEncInfo
);
110 rtl_getTextEncodingInfo( nEncoding
, &aTextEncInfo
);
112 if ( aTextEncInfo
.MinimumCharSize
== aTextEncInfo
.MaximumCharSize
113 && aTextEncInfo
.MinimumCharSize
== 1)
114 rConverter
.mbSingleByteEncoding
= True
;
116 rConverter
.mbSingleByteEncoding
= False
;
119 return rConverter
.mbSingleByteEncoding
;
124 // check whether the character set nEncoding contains the unicode
125 // code point nChar. This list has been compiled from the according
126 // ttmap files in /usr/openwin/lib/X11/fonts/TrueType/ttmap/
128 SalConverterCache::EncodingHasChar( rtl_TextEncoding nEncoding
,
135 case RTL_TEXTENCODING_DONTKNOW
:
139 case RTL_TEXTENCODING_MS_1252
:
140 case RTL_TEXTENCODING_ISO_8859_1
:
141 case RTL_TEXTENCODING_ISO_8859_15
:
142 // handle iso8859-15 and iso8859-1 the same (and both with euro)
143 // handle them also like ms1252
144 // this is due to the fact that so many X fonts say they are iso8859-1
145 // but have the other glyphs anyway because they are really ms1252
146 bMatch
= ( /*nChar >= 0x0000 &&*/ nChar
<= 0x00ff )
147 || ( nChar
== 0x20ac )
148 || ( nChar
== 0x201a )
149 || ( nChar
== 0x0192 )
150 || ( nChar
== 0x201e )
151 || ( nChar
== 0x2026 )
152 || ( nChar
== 0x2020 )
153 || ( nChar
== 0x2021 )
154 || ( nChar
== 0x02c6 )
155 || ( nChar
== 0x2030 )
156 || ( nChar
== 0x0160 )
157 || ( nChar
== 0x2039 )
158 || ( nChar
== 0x0152 )
159 || ( nChar
== 0x017d )
160 || ( nChar
== 0x2018 )
161 || ( nChar
== 0x2019 )
162 || ( nChar
== 0x201c )
163 || ( nChar
== 0x201d )
164 || ( nChar
== 0x2022 )
165 || ( nChar
== 0x2013 )
166 || ( nChar
== 0x2014 )
167 || ( nChar
== 0x02dc )
168 || ( nChar
== 0x2122 )
169 || ( nChar
== 0x0161 )
170 || ( nChar
== 0x203a )
171 || ( nChar
== 0x0153 )
172 || ( nChar
== 0x017e )
173 || ( nChar
== 0x0178 )
177 case RTL_TEXTENCODING_ISO_8859_2
:
178 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
179 || ( nChar
>= 0x00a0 && nChar
<= 0x017e )
180 || ( nChar
>= 0x02c7 && nChar
<= 0x02dd );
183 case RTL_TEXTENCODING_ISO_8859_4
:
184 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
185 || ( nChar
>= 0x00a0 && nChar
<= 0x017e )
186 || ( nChar
>= 0x02c7 && nChar
<= 0x02db );
189 case RTL_TEXTENCODING_ISO_8859_5
:
190 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
191 || ( nChar
>= 0x00a0 && nChar
<= 0x00ad )
192 || ( nChar
>= 0x0401 && nChar
<= 0x045f )
193 || ( nChar
== 0x2116 );
196 case RTL_TEXTENCODING_ISO_8859_6
:
197 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
198 || ( nChar
>= 0x0600 && nChar
<= 0x06ff )
199 || ( nChar
>= 0xfb50 && nChar
<= 0xfffe );
202 case RTL_TEXTENCODING_ISO_8859_7
:
203 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
204 || ( nChar
>= 0x00a0 && nChar
<= 0x00bd )
205 || ( nChar
== 0x02bd )
206 || ( nChar
>= 0x0384 && nChar
<= 0x03ce )
207 || ( nChar
>= 0x2014 && nChar
<= 0x2019 );
210 case RTL_TEXTENCODING_ISO_8859_8
:
211 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
212 || ( nChar
>= 0x00a0 && nChar
<= 0x00f7 )
213 || ( nChar
>= 0x05d0 && nChar
<= 0x05ea )
214 || ( nChar
== 0x2017 );
217 case RTL_TEXTENCODING_ISO_8859_9
:
218 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
219 || ( nChar
>= 0x00a0 && nChar
<= 0x015f );
222 case RTL_TEXTENCODING_ISO_8859_13
:
223 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
224 || ( nChar
>= 0x00a0 && nChar
<= 0x017e )
225 || ( nChar
>= 0x2019 && nChar
<= 0x201e );
228 /* real case for RTL_TEXTENCODING_ISO_8859_15
229 case RTL_TEXTENCODING_ISO_8859_15:
230 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
231 || ( nChar >= 0x00a0 && nChar <= 0x00ff )
232 || ( nChar >= 0x0152 && nChar <= 0x017e )
233 || ( nChar == 0x20ac );
237 case RTL_TEXTENCODING_JIS_X_0201
:
238 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
239 || ( nChar
>= 0xff61 && nChar
<= 0xff9f );
242 case RTL_TEXTENCODING_MS_1251
:
243 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
244 || ( nChar
>= 0x00a0 && nChar
<= 0x00bb )
245 || ( nChar
>= 0x0401 && nChar
<= 0x045f )
246 || ( nChar
>= 0x0490 && nChar
<= 0x0491 )
247 || ( nChar
>= 0x2013 && nChar
<= 0x203a )
248 || ( nChar
>= 0x2116 && nChar
<= 0x2122 );
251 case RTL_TEXTENCODING_KOI8_R
:
252 bMatch
= ( nChar
>= 0x0020 && nChar
<= 0x007e )
253 || ( nChar
>= 0x00a0 && nChar
<= 0x00b7 )
254 || ( nChar
== 0x00f7 )
255 || ( nChar
>= 0x0401 && nChar
<= 0x0451 )
256 || ( nChar
>= 0x2219 && nChar
<= 0x221a )
257 || ( nChar
>= 0x2248 && nChar
<= 0x2265 )
258 || ( nChar
>= 0x2320 && nChar
<= 0x2321 )
259 || ( nChar
>= 0x2500 && nChar
<= 0x25a0 );
262 case RTL_TEXTENCODING_UNICODE
:
266 case RTL_TEXTENCODING_EUC_KR
:
267 case RTL_TEXTENCODING_BIG5
:
268 case RTL_TEXTENCODING_GBK
:
269 case RTL_TEXTENCODING_GB_2312
:
270 case RTL_TEXTENCODING_MS_1361
:
271 case RTL_TEXTENCODING_JIS_X_0208
:
273 // XXX Big5 and Korean EUC contain Ascii chars, but Solaris
274 // *-big5-1 and *-ksc5601.1992-3 fonts dont, in general CJK fonts
275 // are monospaced, so dont trust them for latin chars
283 // XXX really convert the unicode char into the encoding
284 // and check for conversion errors, this is expensive !
285 rtl_UnicodeToTextConverter aConverter
;
286 rtl_UnicodeToTextContext aContext
;
288 aConverter
= GetU2TConverter(nEncoding
);
289 aContext
= rtl_createUnicodeToTextContext( aConverter
);
292 if ( aConverter
== NULL
)
296 sal_Char pConversionBuffer
[ 32 ];
297 sal_uInt32 nConversionInfo
;
298 sal_Size nConvertedChars
;
301 nSize
= rtl_convertUnicodeToText( aConverter
, aContext
,
302 &nChar
, 1, pConversionBuffer
, sizeof(pConversionBuffer
),
303 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
304 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
,
305 &nConversionInfo
, &nConvertedChars
);
307 rtl_destroyUnicodeToTextContext( aConverter
, aContext
);
309 bMatch
= (nConvertedChars
== 1)
310 && (nSize
== 1 || nSize
== 2) // XXX Fix me this is a hack
311 && ((nConversionInfo
& RTL_UNICODETOTEXT_INFO_ERROR
) == 0);
318 // wrapper for rtl_convertUnicodeToText that handles the usual cases for
319 // textconversion in drawtext and gettextwidth routines
321 SalConverterCache::ConvertStringUTF16( const sal_Unicode
*pText
, int nTextLen
,
322 sal_Char
*pBuffer
, sal_Size nBufferSize
, rtl_TextEncoding nEncoding
)
324 rtl_UnicodeToTextConverter aConverter
= GetU2TConverter(nEncoding
);
326 const sal_uInt32 nCvtFlags
=
327 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
328 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK
329 | RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK
;
333 rtl_UnicodeToTextContext aContext
=
334 rtl_createUnicodeToTextContext( aConverter
);
336 sal_Size nSize
= rtl_convertUnicodeToText( aConverter
, aContext
,
337 pText
, nTextLen
, pBuffer
, nBufferSize
,
338 nCvtFlags
, &nCvtInfo
, &nCvtChars
);
340 rtl_destroyUnicodeToTextContext( aConverter
, aContext
);