1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: nlsupport.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
34 #include <osl/nlsupport.h>
35 #include <osl/diagnose.h>
36 #include <osl/process.h>
37 #include <rtl/memory.h>
41 /*****************************************************************************
43 *****************************************************************************/
48 const rtl_TextEncoding value
;
52 /*****************************************************************************
53 compare function for binary search
54 *****************************************************************************/
57 _pair_compare (const char *key
, const _pair
*pair
)
59 int result
= rtl_str_compareIgnoreAsciiCase( key
, pair
->key
);
63 /*****************************************************************************
64 binary search on encoding tables
65 *****************************************************************************/
68 _pair_search (const char *key
, const _pair
*base
, unsigned int member
)
70 unsigned int lower
= 0;
71 unsigned int upper
= member
;
75 /* check for validity of input */
76 if ( (key
== NULL
) || (base
== NULL
) || (member
== 0) )
80 while ( lower
< upper
)
82 current
= (lower
+ upper
) / 2;
83 comparison
= _pair_compare( key
, base
+ current
);
86 else if (comparison
> 0)
89 return base
+ current
;
96 /*****************************************************************************
97 convert rtl_Locale to locale string
98 *****************************************************************************/
100 static char * _compose_locale( rtl_Locale
* pLocale
, char * buffer
, size_t n
)
102 /* check if a valid locale is specified */
103 if( pLocale
&& pLocale
->Language
&& (pLocale
->Language
->length
== 2) )
107 /* convert language code to ascii */
109 rtl_String
*pLanguage
= NULL
;
111 rtl_uString2String( &pLanguage
,
112 pLocale
->Language
->buffer
, pLocale
->Language
->length
,
113 RTL_TEXTENCODING_ASCII_US
, OUSTRING_TO_OSTRING_CVTFLAGS
);
115 if( pLanguage
->length
< n
)
117 strcpy( buffer
, pLanguage
->buffer
);
118 offset
= pLanguage
->length
;
121 rtl_string_release( pLanguage
);
124 /* convert country code to ascii */
125 if( pLocale
->Country
&& (pLocale
->Country
->length
== 2) )
127 rtl_String
*pCountry
= NULL
;
129 rtl_uString2String( &pCountry
,
130 pLocale
->Country
->buffer
, pLocale
->Country
->length
,
131 RTL_TEXTENCODING_ASCII_US
, OUSTRING_TO_OSTRING_CVTFLAGS
);
133 if( offset
+ pCountry
->length
+ 1 < n
)
135 strcpy( buffer
+ offset
++, "_" );
136 strcpy( buffer
+ offset
, pCountry
->buffer
);
137 offset
+= pCountry
->length
;
140 rtl_string_release( pCountry
);
143 /* convert variant to ascii - check if there is enough space for the variant string */
144 if( pLocale
->Variant
&& pLocale
->Variant
->length
&&
145 ( pLocale
->Variant
->length
< n
- 6 ) )
147 rtl_String
*pVariant
= NULL
;
149 rtl_uString2String( &pVariant
,
150 pLocale
->Variant
->buffer
, pLocale
->Variant
->length
,
151 RTL_TEXTENCODING_ASCII_US
, OUSTRING_TO_OSTRING_CVTFLAGS
);
153 if( offset
+ pVariant
->length
+ 1 < n
)
155 strcpy( buffer
+ offset
, pVariant
->buffer
);
156 offset
+= pVariant
->length
;
159 rtl_string_release( pVariant
);
168 /*****************************************************************************
169 convert locale string to rtl_Locale
170 *****************************************************************************/
172 static rtl_Locale
* _parse_locale( const char * locale
)
174 static sal_Unicode c_locale
[2] = { (sal_Unicode
) 'C', 0 };
176 /* check if locale contains a valid string */
179 size_t len
= strlen( locale
);
183 rtl_uString
* pLanguage
= NULL
;
184 rtl_uString
* pCountry
= NULL
;
185 rtl_uString
* pVariant
= NULL
;
189 /* convert language code to unicode */
190 rtl_string2UString( &pLanguage
, locale
, 2, RTL_TEXTENCODING_ASCII_US
, OSTRING_TO_OUSTRING_CVTFLAGS
);
191 OSL_ASSERT(pLanguage
!= NULL
);
193 /* convert country code to unicode */
194 if( len
>= 5 && '_' == locale
[2] )
196 rtl_string2UString( &pCountry
, locale
+ 3, 2, RTL_TEXTENCODING_ASCII_US
, OSTRING_TO_OUSTRING_CVTFLAGS
);
197 OSL_ASSERT(pCountry
!= NULL
);
201 /* convert variant code to unicode - do not rely on "." as delimiter */
203 rtl_string2UString( &pVariant
, locale
+ offset
, len
- offset
, RTL_TEXTENCODING_ASCII_US
, OSTRING_TO_OUSTRING_CVTFLAGS
);
204 OSL_ASSERT(pVariant
!= NULL
);
207 rtl_Locale
* ret
= rtl_locale_register( pLanguage
->buffer
, pCountry
? pCountry
->buffer
: c_locale
+ 1, pVariant
? pVariant
->buffer
: c_locale
+ 1 );
209 if (pVariant
) rtl_uString_release(pVariant
);
210 if (pCountry
) rtl_uString_release(pCountry
);
211 if (pLanguage
) rtl_uString_release(pLanguage
);
216 return rtl_locale_register( c_locale
, c_locale
+ 1, c_locale
+ 1 );
223 * This implementation of osl_getTextEncodingFromLocale maps
224 * from the ISO language codes.
227 const _pair _full_locale_list
[] = {
228 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP
},
229 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP
},
230 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR
},
231 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN
},
232 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW
}
235 const _pair _locale_extension_list
[] = {
236 { "big5", RTL_TEXTENCODING_BIG5
},
237 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS
},
238 { "gb18030", RTL_TEXTENCODING_GB_18030
},
239 { "euc", RTL_TEXTENCODING_EUC_JP
},
240 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1
},
241 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10
},
242 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13
},
243 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14
},
244 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15
},
245 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2
},
246 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3
},
247 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4
},
248 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5
},
249 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6
},
250 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7
},
251 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8
},
252 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9
},
253 { "koi8-r", RTL_TEXTENCODING_KOI8_R
},
254 { "koi8-u", RTL_TEXTENCODING_KOI8_U
},
255 { "pck", RTL_TEXTENCODING_MS_932
},
257 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW
},
259 { "utf-16", RTL_TEXTENCODING_UNICODE
},
260 { "utf-7", RTL_TEXTENCODING_UTF7
},
261 { "utf-8", RTL_TEXTENCODING_UTF8
}
264 const _pair _iso_language_list
[] = {
265 { "af", RTL_TEXTENCODING_ISO_8859_1
},
266 { "ar", RTL_TEXTENCODING_ISO_8859_6
},
267 { "az", RTL_TEXTENCODING_ISO_8859_9
},
268 { "be", RTL_TEXTENCODING_ISO_8859_5
},
269 { "bg", RTL_TEXTENCODING_ISO_8859_5
},
270 { "ca", RTL_TEXTENCODING_ISO_8859_1
},
271 { "cs", RTL_TEXTENCODING_ISO_8859_2
},
272 { "da", RTL_TEXTENCODING_ISO_8859_1
},
273 { "de", RTL_TEXTENCODING_ISO_8859_1
},
274 { "el", RTL_TEXTENCODING_ISO_8859_7
},
275 { "en", RTL_TEXTENCODING_ISO_8859_1
},
276 { "es", RTL_TEXTENCODING_ISO_8859_1
},
277 { "et", RTL_TEXTENCODING_ISO_8859_4
},
278 { "eu", RTL_TEXTENCODING_ISO_8859_1
},
279 { "fa", RTL_TEXTENCODING_ISO_8859_6
},
280 { "fi", RTL_TEXTENCODING_ISO_8859_1
},
281 { "fo", RTL_TEXTENCODING_ISO_8859_1
},
282 { "fr", RTL_TEXTENCODING_ISO_8859_1
},
283 { "gr", RTL_TEXTENCODING_ISO_8859_7
},
284 { "he", RTL_TEXTENCODING_ISO_8859_8
},
285 { "hi", RTL_TEXTENCODING_DONTKNOW
},
286 { "hr", RTL_TEXTENCODING_ISO_8859_2
},
287 { "hu", RTL_TEXTENCODING_ISO_8859_2
},
288 { "hy", RTL_TEXTENCODING_DONTKNOW
},
289 { "id", RTL_TEXTENCODING_ISO_8859_1
},
290 { "is", RTL_TEXTENCODING_ISO_8859_1
},
291 { "it", RTL_TEXTENCODING_ISO_8859_1
},
292 { "iw", RTL_TEXTENCODING_ISO_8859_8
},
293 { "ja", RTL_TEXTENCODING_EUC_JP
},
294 { "ka", RTL_TEXTENCODING_DONTKNOW
},
295 { "kk", RTL_TEXTENCODING_ISO_8859_5
},
296 { "ko", RTL_TEXTENCODING_EUC_KR
},
297 { "lt", RTL_TEXTENCODING_ISO_8859_4
},
298 { "lv", RTL_TEXTENCODING_ISO_8859_4
},
299 { "mk", RTL_TEXTENCODING_ISO_8859_5
},
300 { "mr", RTL_TEXTENCODING_DONTKNOW
},
301 { "ms", RTL_TEXTENCODING_ISO_8859_1
},
302 { "nl", RTL_TEXTENCODING_ISO_8859_1
},
303 { "no", RTL_TEXTENCODING_ISO_8859_1
},
304 { "pl", RTL_TEXTENCODING_ISO_8859_2
},
305 { "pt", RTL_TEXTENCODING_ISO_8859_1
},
306 { "ro", RTL_TEXTENCODING_ISO_8859_2
},
307 { "ru", RTL_TEXTENCODING_ISO_8859_5
},
308 { "sa", RTL_TEXTENCODING_DONTKNOW
},
309 { "sk", RTL_TEXTENCODING_ISO_8859_2
},
310 { "sl", RTL_TEXTENCODING_ISO_8859_2
},
311 { "sq", RTL_TEXTENCODING_ISO_8859_2
},
312 { "sv", RTL_TEXTENCODING_ISO_8859_1
},
313 { "sw", RTL_TEXTENCODING_ISO_8859_1
},
314 { "ta", RTL_TEXTENCODING_DONTKNOW
},
315 { "th", RTL_TEXTENCODING_DONTKNOW
},
316 { "tr", RTL_TEXTENCODING_ISO_8859_9
},
317 { "tt", RTL_TEXTENCODING_ISO_8859_5
},
318 { "uk", RTL_TEXTENCODING_ISO_8859_5
},
319 { "ur", RTL_TEXTENCODING_ISO_8859_6
},
320 { "uz", RTL_TEXTENCODING_ISO_8859_9
},
321 { "vi", RTL_TEXTENCODING_DONTKNOW
},
322 { "zh", RTL_TEXTENCODING_BIG5
}
325 /*****************************************************************************
326 return the text encoding corresponding to the given locale
327 *****************************************************************************/
329 rtl_TextEncoding
osl_getTextEncodingFromLocale( rtl_Locale
* pLocale
)
331 const _pair
*language
= 0;
332 char locale_buf
[64] = "";
335 WinMessageBox(HWND_DESKTOP
,HWND_DESKTOP
,
336 "Please contact technical support and report above informations.\n\n",
337 "Critical error: osl_getTextEncodingFromLocale",
338 0, MB_ERROR
| MB_OK
| MB_MOVEABLE
);
340 /* default to process locale if pLocale == NULL */
341 if( NULL
== pLocale
)
342 osl_getProcessLocale( &pLocale
);
344 /* convert rtl_Locale to locale string */
345 if( _compose_locale( pLocale
, locale_buf
, 64 ) )
347 /* check special handling list (EUC) first */
348 const unsigned int members
= sizeof( _full_locale_list
) / sizeof( _pair
);
349 language
= _pair_search( locale_buf
, _full_locale_list
, members
);
351 if( NULL
== language
)
354 * check if there is a charset qualifier at the end of the given locale string
355 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
358 cp
= strrchr( locale_buf
, '.' );
362 const unsigned int members
= sizeof( _locale_extension_list
) / sizeof( _pair
);
363 language
= _pair_search( cp
+ 1, _locale_extension_list
, members
);
367 /* use iso language code to determine the charset */
368 if( NULL
== language
)
370 const unsigned int members
= sizeof( _iso_language_list
) / sizeof( _pair
);
372 /* iso lang codes have 2 charaters */
373 locale_buf
[2] = '\0';
375 language
= _pair_search( locale_buf
, _iso_language_list
, members
);
379 /* a matching item in our list provides a mapping from codeset to
381 if ( language
!= NULL
)
382 return language
->value
;
384 return RTL_TEXTENCODING_DONTKNOW
;
387 /*****************************************************************************
388 return the current process locale
389 *****************************************************************************/
391 void _imp_getProcessLocale( rtl_Locale
** ppLocale
)
393 /* simulate behavior off setlocale */
394 char * locale
= getenv( "LC_ALL" );
397 locale
= getenv( "LC_CTYPE" );
400 locale
= getenv( "LANG" );
405 *ppLocale
= _parse_locale( locale
);
408 /*****************************************************************************
409 set the current process locale
410 *****************************************************************************/
412 int _imp_setProcessLocale( rtl_Locale
* pLocale
)
419 /* convert rtl_Locale to locale string */
420 if( NULL
!= _compose_locale( pLocale
, locale_buf
, 64 ) )
422 /* only change env vars that exist already */
423 if( getenv( "LC_ALL" ) ) {
425 snprintf(env_buf
, sizeof(env_buf
), "LC_ALL=%s", locale_buf
);
426 env_buf
[sizeof(env_buf
)] = '\0';
428 #elif defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
429 setenv( "LC_ALL", locale_buf
, 1);
431 setenv( "LC_ALL", locale_buf
);
435 if( getenv( "LC_CTYPE" ) ) {
437 snprintf(env_buf
, sizeof(env_buf
), "LC_CTYPE=%s", locale_buf
);
438 env_buf
[sizeof(env_buf
)] = '\0';
440 #elif defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
441 setenv("LC_CTYPE", locale_buf
, 1 );
443 setenv( "LC_CTYPE", locale_buf
);
447 if( getenv( "LANG" ) ) {
449 snprintf(env_buf
, sizeof(env_buf
), "LANG=%s", locale_buf
);
450 env_buf
[sizeof(env_buf
)] = '\0';
452 #elif defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
453 setenv("LC_CTYPE", locale_buf
, 1 );
455 setenv( "LANG", locale_buf
);