1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
31 #include <osl/nlsupport.h>
32 #include <osl/diagnose.h>
33 #include <osl/process.h>
34 #include <rtl/memory.h>
38 /*****************************************************************************
40 *****************************************************************************/
45 const rtl_TextEncoding value
;
49 /*****************************************************************************
50 compare function for binary search
51 *****************************************************************************/
54 _pair_compare (const char *key
, const _pair
*pair
)
56 int result
= rtl_str_compareIgnoreAsciiCase( key
, pair
->key
);
60 /*****************************************************************************
61 binary search on encoding tables
62 *****************************************************************************/
65 _pair_search (const char *key
, const _pair
*base
, unsigned int member
)
67 unsigned int lower
= 0;
68 unsigned int upper
= member
;
72 /* check for validity of input */
73 if ( (key
== NULL
) || (base
== NULL
) || (member
== 0) )
77 while ( lower
< upper
)
79 current
= (lower
+ upper
) / 2;
80 comparison
= _pair_compare( key
, base
+ current
);
83 else if (comparison
> 0)
86 return base
+ current
;
93 /*****************************************************************************
94 convert rtl_Locale to locale string
95 *****************************************************************************/
97 static char * _compose_locale( rtl_Locale
* pLocale
, char * buffer
, size_t n
)
99 /* check if a valid locale is specified */
100 if( pLocale
&& pLocale
->Language
&& (pLocale
->Language
->length
== 2) )
104 /* convert language code to ascii */
106 rtl_String
*pLanguage
= NULL
;
108 rtl_uString2String( &pLanguage
,
109 pLocale
->Language
->buffer
, pLocale
->Language
->length
,
110 RTL_TEXTENCODING_ASCII_US
, OUSTRING_TO_OSTRING_CVTFLAGS
);
112 if( pLanguage
->length
< n
)
114 strcpy( buffer
, pLanguage
->buffer
);
115 offset
= pLanguage
->length
;
118 rtl_string_release( pLanguage
);
121 /* convert country code to ascii */
122 if( pLocale
->Country
&& (pLocale
->Country
->length
== 2) )
124 rtl_String
*pCountry
= NULL
;
126 rtl_uString2String( &pCountry
,
127 pLocale
->Country
->buffer
, pLocale
->Country
->length
,
128 RTL_TEXTENCODING_ASCII_US
, OUSTRING_TO_OSTRING_CVTFLAGS
);
130 if( offset
+ pCountry
->length
+ 1 < n
)
132 strcpy( buffer
+ offset
++, "_" );
133 strcpy( buffer
+ offset
, pCountry
->buffer
);
134 offset
+= pCountry
->length
;
137 rtl_string_release( pCountry
);
140 /* convert variant to ascii - check if there is enough space for the variant string */
141 if( pLocale
->Variant
&& pLocale
->Variant
->length
&&
142 ( pLocale
->Variant
->length
< n
- 6 ) )
144 rtl_String
*pVariant
= NULL
;
146 rtl_uString2String( &pVariant
,
147 pLocale
->Variant
->buffer
, pLocale
->Variant
->length
,
148 RTL_TEXTENCODING_ASCII_US
, OUSTRING_TO_OSTRING_CVTFLAGS
);
150 if( offset
+ pVariant
->length
+ 1 < n
)
152 strcpy( buffer
+ offset
, pVariant
->buffer
);
153 offset
+= pVariant
->length
;
156 rtl_string_release( pVariant
);
165 /*****************************************************************************
166 convert locale string to rtl_Locale
167 *****************************************************************************/
169 static rtl_Locale
* _parse_locale( const char * locale
)
171 static sal_Unicode c_locale
[2] = { (sal_Unicode
) 'C', 0 };
173 /* check if locale contains a valid string */
176 size_t len
= strlen( locale
);
180 rtl_uString
* pLanguage
= NULL
;
181 rtl_uString
* pCountry
= NULL
;
182 rtl_uString
* pVariant
= NULL
;
186 /* convert language code to unicode */
187 rtl_string2UString( &pLanguage
, locale
, 2, RTL_TEXTENCODING_ASCII_US
, OSTRING_TO_OUSTRING_CVTFLAGS
);
188 OSL_ASSERT(pLanguage
!= NULL
);
190 /* convert country code to unicode */
191 if( len
>= 5 && '_' == locale
[2] )
193 rtl_string2UString( &pCountry
, locale
+ 3, 2, RTL_TEXTENCODING_ASCII_US
, OSTRING_TO_OUSTRING_CVTFLAGS
);
194 OSL_ASSERT(pCountry
!= NULL
);
198 /* convert variant code to unicode - do not rely on "." as delimiter */
200 rtl_string2UString( &pVariant
, locale
+ offset
, len
- offset
, RTL_TEXTENCODING_ASCII_US
, OSTRING_TO_OUSTRING_CVTFLAGS
);
201 OSL_ASSERT(pVariant
!= NULL
);
204 rtl_Locale
* ret
= rtl_locale_register( pLanguage
->buffer
, pCountry
? pCountry
->buffer
: c_locale
+ 1, pVariant
? pVariant
->buffer
: c_locale
+ 1 );
206 if (pVariant
) rtl_uString_release(pVariant
);
207 if (pCountry
) rtl_uString_release(pCountry
);
208 if (pLanguage
) rtl_uString_release(pLanguage
);
213 return rtl_locale_register( c_locale
, c_locale
+ 1, c_locale
+ 1 );
220 * This implementation of osl_getTextEncodingFromLocale maps
221 * from the ISO language codes.
224 const _pair _full_locale_list
[] = {
225 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP
},
226 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP
},
227 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR
},
228 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN
},
229 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW
}
232 const _pair _locale_extension_list
[] = {
233 { "big5", RTL_TEXTENCODING_BIG5
},
234 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS
},
235 { "gb18030", RTL_TEXTENCODING_GB_18030
},
236 { "euc", RTL_TEXTENCODING_EUC_JP
},
237 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1
},
238 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10
},
239 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13
},
240 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14
},
241 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15
},
242 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2
},
243 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3
},
244 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4
},
245 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5
},
246 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6
},
247 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7
},
248 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8
},
249 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9
},
250 { "koi8-r", RTL_TEXTENCODING_KOI8_R
},
251 { "koi8-u", RTL_TEXTENCODING_KOI8_U
},
252 { "pck", RTL_TEXTENCODING_MS_932
},
254 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW
},
256 { "utf-16", RTL_TEXTENCODING_UNICODE
},
257 { "utf-7", RTL_TEXTENCODING_UTF7
},
258 { "utf-8", RTL_TEXTENCODING_UTF8
}
261 const _pair _iso_language_list
[] = {
262 { "af", RTL_TEXTENCODING_ISO_8859_1
},
263 { "ar", RTL_TEXTENCODING_ISO_8859_6
},
264 { "az", RTL_TEXTENCODING_ISO_8859_9
},
265 { "be", RTL_TEXTENCODING_ISO_8859_5
},
266 { "bg", RTL_TEXTENCODING_ISO_8859_5
},
267 { "ca", RTL_TEXTENCODING_ISO_8859_1
},
268 { "cs", RTL_TEXTENCODING_ISO_8859_2
},
269 { "da", RTL_TEXTENCODING_ISO_8859_1
},
270 { "de", RTL_TEXTENCODING_ISO_8859_1
},
271 { "el", RTL_TEXTENCODING_ISO_8859_7
},
272 { "en", RTL_TEXTENCODING_ISO_8859_1
},
273 { "es", RTL_TEXTENCODING_ISO_8859_1
},
274 { "et", RTL_TEXTENCODING_ISO_8859_4
},
275 { "eu", RTL_TEXTENCODING_ISO_8859_1
},
276 { "fa", RTL_TEXTENCODING_ISO_8859_6
},
277 { "fi", RTL_TEXTENCODING_ISO_8859_1
},
278 { "fo", RTL_TEXTENCODING_ISO_8859_1
},
279 { "fr", RTL_TEXTENCODING_ISO_8859_1
},
280 { "gr", RTL_TEXTENCODING_ISO_8859_7
},
281 { "he", RTL_TEXTENCODING_ISO_8859_8
},
282 { "hi", RTL_TEXTENCODING_DONTKNOW
},
283 { "hr", RTL_TEXTENCODING_ISO_8859_2
},
284 { "hu", RTL_TEXTENCODING_ISO_8859_2
},
285 { "hy", RTL_TEXTENCODING_DONTKNOW
},
286 { "id", RTL_TEXTENCODING_ISO_8859_1
},
287 { "is", RTL_TEXTENCODING_ISO_8859_1
},
288 { "it", RTL_TEXTENCODING_ISO_8859_1
},
289 { "iw", RTL_TEXTENCODING_ISO_8859_8
},
290 { "ja", RTL_TEXTENCODING_EUC_JP
},
291 { "ka", RTL_TEXTENCODING_DONTKNOW
},
292 { "kk", RTL_TEXTENCODING_ISO_8859_5
},
293 { "ko", RTL_TEXTENCODING_EUC_KR
},
294 { "lt", RTL_TEXTENCODING_ISO_8859_4
},
295 { "lv", RTL_TEXTENCODING_ISO_8859_4
},
296 { "mk", RTL_TEXTENCODING_ISO_8859_5
},
297 { "mr", RTL_TEXTENCODING_DONTKNOW
},
298 { "ms", RTL_TEXTENCODING_ISO_8859_1
},
299 { "nl", RTL_TEXTENCODING_ISO_8859_1
},
300 { "no", RTL_TEXTENCODING_ISO_8859_1
},
301 { "pl", RTL_TEXTENCODING_ISO_8859_2
},
302 { "pt", RTL_TEXTENCODING_ISO_8859_1
},
303 { "ro", RTL_TEXTENCODING_ISO_8859_2
},
304 { "ru", RTL_TEXTENCODING_ISO_8859_5
},
305 { "sa", RTL_TEXTENCODING_DONTKNOW
},
306 { "sk", RTL_TEXTENCODING_ISO_8859_2
},
307 { "sl", RTL_TEXTENCODING_ISO_8859_2
},
308 { "sq", RTL_TEXTENCODING_ISO_8859_2
},
309 { "sv", RTL_TEXTENCODING_ISO_8859_1
},
310 { "sw", RTL_TEXTENCODING_ISO_8859_1
},
311 { "ta", RTL_TEXTENCODING_DONTKNOW
},
312 { "th", RTL_TEXTENCODING_DONTKNOW
},
313 { "tr", RTL_TEXTENCODING_ISO_8859_9
},
314 { "tt", RTL_TEXTENCODING_ISO_8859_5
},
315 { "uk", RTL_TEXTENCODING_ISO_8859_5
},
316 { "ur", RTL_TEXTENCODING_ISO_8859_6
},
317 { "uz", RTL_TEXTENCODING_ISO_8859_9
},
318 { "vi", RTL_TEXTENCODING_DONTKNOW
},
319 { "zh", RTL_TEXTENCODING_BIG5
}
322 /*****************************************************************************
323 return the text encoding corresponding to the given locale
324 *****************************************************************************/
326 rtl_TextEncoding
osl_getTextEncodingFromLocale( rtl_Locale
* pLocale
)
328 const _pair
*language
= 0;
329 char locale_buf
[64] = "";
332 WinMessageBox(HWND_DESKTOP
,HWND_DESKTOP
,
333 "Please contact technical support and report above informations.\n\n",
334 "Critical error: osl_getTextEncodingFromLocale",
335 0, MB_ERROR
| MB_OK
| MB_MOVEABLE
);
337 /* default to process locale if pLocale == NULL */
338 if( NULL
== pLocale
)
339 osl_getProcessLocale( &pLocale
);
341 /* convert rtl_Locale to locale string */
342 if( _compose_locale( pLocale
, locale_buf
, 64 ) )
344 /* check special handling list (EUC) first */
345 const unsigned int members
= sizeof( _full_locale_list
) / sizeof( _pair
);
346 language
= _pair_search( locale_buf
, _full_locale_list
, members
);
348 if( NULL
== language
)
351 * check if there is a charset qualifier at the end of the given locale string
352 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
355 cp
= strrchr( locale_buf
, '.' );
359 const unsigned int members
= sizeof( _locale_extension_list
) / sizeof( _pair
);
360 language
= _pair_search( cp
+ 1, _locale_extension_list
, members
);
364 /* use iso language code to determine the charset */
365 if( NULL
== language
)
367 const unsigned int members
= sizeof( _iso_language_list
) / sizeof( _pair
);
369 /* iso lang codes have 2 charaters */
370 locale_buf
[2] = '\0';
372 language
= _pair_search( locale_buf
, _iso_language_list
, members
);
376 /* a matching item in our list provides a mapping from codeset to
378 if ( language
!= NULL
)
379 return language
->value
;
381 return RTL_TEXTENCODING_DONTKNOW
;
384 /*****************************************************************************
385 return the current process locale
386 *****************************************************************************/
388 void _imp_getProcessLocale( rtl_Locale
** ppLocale
)
390 /* simulate behavior off setlocale */
391 char * locale
= getenv( "LC_ALL" );
394 locale
= getenv( "LC_CTYPE" );
397 locale
= getenv( "LANG" );
402 *ppLocale
= _parse_locale( locale
);
405 /*****************************************************************************
406 set the current process locale
407 *****************************************************************************/
409 int _imp_setProcessLocale( rtl_Locale
* pLocale
)
413 /* convert rtl_Locale to locale string */
414 if( NULL
!= _compose_locale( pLocale
, locale_buf
, 64 ) )
416 /* only change env vars that exist already */
417 if( getenv( "LC_ALL" ) ) {
418 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
419 setenv( "LC_ALL", locale_buf
, 1);
421 setenv( "LC_ALL", locale_buf
);
425 if( getenv( "LC_CTYPE" ) ) {
426 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
427 setenv("LC_CTYPE", locale_buf
, 1 );
429 setenv( "LC_CTYPE", locale_buf
);
433 if( getenv( "LANG" ) ) {
434 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
435 setenv("LC_CTYPE", locale_buf
, 1 );
437 setenv( "LANG", locale_buf
);