update dev300-m57
[ooovba.git] / sal / osl / os2 / nlsupport.c
blob7fb63981d9bcd6965f6d57bd1e5538cb909dfdfa
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: nlsupport.c,v $
10 * $Revision: 1.5 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #define INCL_WIN
32 #include "svpm.h"
34 #include <osl/nlsupport.h>
35 #include <osl/diagnose.h>
36 #include <osl/process.h>
37 #include <rtl/memory.h>
39 #include <string.h>
41 /*****************************************************************************
42 typedefs
43 *****************************************************************************/
46 typedef struct {
47 const char *key;
48 const rtl_TextEncoding value;
49 } _pair;
52 /*****************************************************************************
53 compare function for binary search
54 *****************************************************************************/
56 static int
57 _pair_compare (const char *key, const _pair *pair)
59 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
60 return result;
63 /*****************************************************************************
64 binary search on encoding tables
65 *****************************************************************************/
67 static const _pair*
68 _pair_search (const char *key, const _pair *base, unsigned int member )
70 unsigned int lower = 0;
71 unsigned int upper = member;
72 unsigned int current;
73 int comparison;
75 /* check for validity of input */
76 if ( (key == NULL) || (base == NULL) || (member == 0) )
77 return NULL;
79 /* binary search */
80 while ( lower < upper )
82 current = (lower + upper) / 2;
83 comparison = _pair_compare( key, base + current );
84 if (comparison < 0)
85 upper = current;
86 else if (comparison > 0)
87 lower = current + 1;
88 else
89 return base + current;
92 return NULL;
96 /*****************************************************************************
97 convert rtl_Locale to locale string
98 *****************************************************************************/
100 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
102 /* check if a valid locale is specified */
103 if( pLocale && pLocale->Language && (pLocale->Language->length == 2) )
105 size_t offset = 0;
107 /* convert language code to ascii */
109 rtl_String *pLanguage = NULL;
111 rtl_uString2String( &pLanguage,
112 pLocale->Language->buffer, pLocale->Language->length,
113 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
115 if( pLanguage->length < n )
117 strcpy( buffer, pLanguage->buffer );
118 offset = pLanguage->length;
121 rtl_string_release( pLanguage );
124 /* convert country code to ascii */
125 if( pLocale->Country && (pLocale->Country->length == 2) )
127 rtl_String *pCountry = NULL;
129 rtl_uString2String( &pCountry,
130 pLocale->Country->buffer, pLocale->Country->length,
131 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
133 if( offset + pCountry->length + 1 < n )
135 strcpy( buffer + offset++, "_" );
136 strcpy( buffer + offset, pCountry->buffer );
137 offset += pCountry->length;
140 rtl_string_release( pCountry );
143 /* convert variant to ascii - check if there is enough space for the variant string */
144 if( pLocale->Variant && pLocale->Variant->length &&
145 ( pLocale->Variant->length < n - 6 ) )
147 rtl_String *pVariant = NULL;
149 rtl_uString2String( &pVariant,
150 pLocale->Variant->buffer, pLocale->Variant->length,
151 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
153 if( offset + pVariant->length + 1 < n )
155 strcpy( buffer + offset, pVariant->buffer );
156 offset += pVariant->length;
159 rtl_string_release( pVariant );
162 return buffer;
165 return NULL;
168 /*****************************************************************************
169 convert locale string to rtl_Locale
170 *****************************************************************************/
172 static rtl_Locale * _parse_locale( const char * locale )
174 static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 };
176 /* check if locale contains a valid string */
177 if( locale )
179 size_t len = strlen( locale );
181 if( len >= 2 )
183 rtl_uString * pLanguage = NULL;
184 rtl_uString * pCountry = NULL;
185 rtl_uString * pVariant = NULL;
187 size_t offset = 2;
189 /* convert language code to unicode */
190 rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
191 OSL_ASSERT(pLanguage != NULL);
193 /* convert country code to unicode */
194 if( len >= 5 && '_' == locale[2] )
196 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
197 OSL_ASSERT(pCountry != NULL);
198 offset = 5;
201 /* convert variant code to unicode - do not rely on "." as delimiter */
202 if( len > offset ) {
203 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
204 OSL_ASSERT(pVariant != NULL);
207 rtl_Locale * ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 );
209 if (pVariant) rtl_uString_release(pVariant);
210 if (pCountry) rtl_uString_release(pCountry);
211 if (pLanguage) rtl_uString_release(pLanguage);
213 return ret;
215 else
216 return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 );
219 return NULL;
223 * This implementation of osl_getTextEncodingFromLocale maps
224 * from the ISO language codes.
227 const _pair _full_locale_list[] = {
228 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP },
229 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP },
230 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR },
231 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN },
232 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW }
235 const _pair _locale_extension_list[] = {
236 { "big5", RTL_TEXTENCODING_BIG5 },
237 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS },
238 { "gb18030", RTL_TEXTENCODING_GB_18030 },
239 { "euc", RTL_TEXTENCODING_EUC_JP },
240 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 },
241 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 },
242 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 },
243 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 },
244 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 },
245 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 },
246 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 },
247 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 },
248 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 },
249 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 },
250 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 },
251 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 },
252 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 },
253 { "koi8-r", RTL_TEXTENCODING_KOI8_R },
254 { "koi8-u", RTL_TEXTENCODING_KOI8_U },
255 { "pck", RTL_TEXTENCODING_MS_932 },
256 #if (0)
257 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW },
258 #endif
259 { "utf-16", RTL_TEXTENCODING_UNICODE },
260 { "utf-7", RTL_TEXTENCODING_UTF7 },
261 { "utf-8", RTL_TEXTENCODING_UTF8 }
264 const _pair _iso_language_list[] = {
265 { "af", RTL_TEXTENCODING_ISO_8859_1 },
266 { "ar", RTL_TEXTENCODING_ISO_8859_6 },
267 { "az", RTL_TEXTENCODING_ISO_8859_9 },
268 { "be", RTL_TEXTENCODING_ISO_8859_5 },
269 { "bg", RTL_TEXTENCODING_ISO_8859_5 },
270 { "ca", RTL_TEXTENCODING_ISO_8859_1 },
271 { "cs", RTL_TEXTENCODING_ISO_8859_2 },
272 { "da", RTL_TEXTENCODING_ISO_8859_1 },
273 { "de", RTL_TEXTENCODING_ISO_8859_1 },
274 { "el", RTL_TEXTENCODING_ISO_8859_7 },
275 { "en", RTL_TEXTENCODING_ISO_8859_1 },
276 { "es", RTL_TEXTENCODING_ISO_8859_1 },
277 { "et", RTL_TEXTENCODING_ISO_8859_4 },
278 { "eu", RTL_TEXTENCODING_ISO_8859_1 },
279 { "fa", RTL_TEXTENCODING_ISO_8859_6 },
280 { "fi", RTL_TEXTENCODING_ISO_8859_1 },
281 { "fo", RTL_TEXTENCODING_ISO_8859_1 },
282 { "fr", RTL_TEXTENCODING_ISO_8859_1 },
283 { "gr", RTL_TEXTENCODING_ISO_8859_7 },
284 { "he", RTL_TEXTENCODING_ISO_8859_8 },
285 { "hi", RTL_TEXTENCODING_DONTKNOW },
286 { "hr", RTL_TEXTENCODING_ISO_8859_2 },
287 { "hu", RTL_TEXTENCODING_ISO_8859_2 },
288 { "hy", RTL_TEXTENCODING_DONTKNOW },
289 { "id", RTL_TEXTENCODING_ISO_8859_1 },
290 { "is", RTL_TEXTENCODING_ISO_8859_1 },
291 { "it", RTL_TEXTENCODING_ISO_8859_1 },
292 { "iw", RTL_TEXTENCODING_ISO_8859_8 },
293 { "ja", RTL_TEXTENCODING_EUC_JP },
294 { "ka", RTL_TEXTENCODING_DONTKNOW },
295 { "kk", RTL_TEXTENCODING_ISO_8859_5 },
296 { "ko", RTL_TEXTENCODING_EUC_KR },
297 { "lt", RTL_TEXTENCODING_ISO_8859_4 },
298 { "lv", RTL_TEXTENCODING_ISO_8859_4 },
299 { "mk", RTL_TEXTENCODING_ISO_8859_5 },
300 { "mr", RTL_TEXTENCODING_DONTKNOW },
301 { "ms", RTL_TEXTENCODING_ISO_8859_1 },
302 { "nl", RTL_TEXTENCODING_ISO_8859_1 },
303 { "no", RTL_TEXTENCODING_ISO_8859_1 },
304 { "pl", RTL_TEXTENCODING_ISO_8859_2 },
305 { "pt", RTL_TEXTENCODING_ISO_8859_1 },
306 { "ro", RTL_TEXTENCODING_ISO_8859_2 },
307 { "ru", RTL_TEXTENCODING_ISO_8859_5 },
308 { "sa", RTL_TEXTENCODING_DONTKNOW },
309 { "sk", RTL_TEXTENCODING_ISO_8859_2 },
310 { "sl", RTL_TEXTENCODING_ISO_8859_2 },
311 { "sq", RTL_TEXTENCODING_ISO_8859_2 },
312 { "sv", RTL_TEXTENCODING_ISO_8859_1 },
313 { "sw", RTL_TEXTENCODING_ISO_8859_1 },
314 { "ta", RTL_TEXTENCODING_DONTKNOW },
315 { "th", RTL_TEXTENCODING_DONTKNOW },
316 { "tr", RTL_TEXTENCODING_ISO_8859_9 },
317 { "tt", RTL_TEXTENCODING_ISO_8859_5 },
318 { "uk", RTL_TEXTENCODING_ISO_8859_5 },
319 { "ur", RTL_TEXTENCODING_ISO_8859_6 },
320 { "uz", RTL_TEXTENCODING_ISO_8859_9 },
321 { "vi", RTL_TEXTENCODING_DONTKNOW },
322 { "zh", RTL_TEXTENCODING_BIG5 }
325 /*****************************************************************************
326 return the text encoding corresponding to the given locale
327 *****************************************************************************/
329 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
331 const _pair *language = 0;
332 char locale_buf[64] = "";
333 char *cp;
335 WinMessageBox(HWND_DESKTOP,HWND_DESKTOP,
336 "Please contact technical support and report above informations.\n\n",
337 "Critical error: osl_getTextEncodingFromLocale",
338 0, MB_ERROR | MB_OK | MB_MOVEABLE);
340 /* default to process locale if pLocale == NULL */
341 if( NULL == pLocale )
342 osl_getProcessLocale( &pLocale );
344 /* convert rtl_Locale to locale string */
345 if( _compose_locale( pLocale, locale_buf, 64 ) )
347 /* check special handling list (EUC) first */
348 const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
349 language = _pair_search( locale_buf, _full_locale_list, members);
351 if( NULL == language )
354 * check if there is a charset qualifier at the end of the given locale string
355 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
356 * charset to use
358 cp = strrchr( locale_buf, '.' );
360 if( NULL != cp )
362 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
363 language = _pair_search( cp + 1, _locale_extension_list, members);
367 /* use iso language code to determine the charset */
368 if( NULL == language )
370 const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );
372 /* iso lang codes have 2 charaters */
373 locale_buf[2] = '\0';
375 language = _pair_search( locale_buf, _iso_language_list, members);
379 /* a matching item in our list provides a mapping from codeset to
380 * rtl-codeset */
381 if ( language != NULL )
382 return language->value;
384 return RTL_TEXTENCODING_DONTKNOW;
387 /*****************************************************************************
388 return the current process locale
389 *****************************************************************************/
391 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
393 /* simulate behavior off setlocale */
394 char * locale = getenv( "LC_ALL" );
396 if( NULL == locale )
397 locale = getenv( "LC_CTYPE" );
399 if( NULL == locale )
400 locale = getenv( "LANG" );
402 if( NULL == locale )
403 locale = "C";
405 *ppLocale = _parse_locale( locale );
408 /*****************************************************************************
409 set the current process locale
410 *****************************************************************************/
412 int _imp_setProcessLocale( rtl_Locale * pLocale )
414 #ifdef IRIX
415 char env_buf[80];
416 #endif
417 char locale_buf[64];
419 /* convert rtl_Locale to locale string */
420 if( NULL != _compose_locale( pLocale, locale_buf, 64 ) )
422 /* only change env vars that exist already */
423 if( getenv( "LC_ALL" ) ) {
424 #if defined( IRIX )
425 snprintf(env_buf, sizeof(env_buf), "LC_ALL=%s", locale_buf);
426 env_buf[sizeof(env_buf)] = '\0';
427 putenv(env_buf);
428 #elif defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
429 setenv( "LC_ALL", locale_buf, 1);
430 #else
431 setenv( "LC_ALL", locale_buf );
432 #endif
435 if( getenv( "LC_CTYPE" ) ) {
436 #if defined( IRIX )
437 snprintf(env_buf, sizeof(env_buf), "LC_CTYPE=%s", locale_buf);
438 env_buf[sizeof(env_buf)] = '\0';
439 putenv(env_buf);
440 #elif defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
441 setenv("LC_CTYPE", locale_buf, 1 );
442 #else
443 setenv( "LC_CTYPE", locale_buf );
444 #endif
447 if( getenv( "LANG" ) ) {
448 #if defined( IRIX )
449 snprintf(env_buf, sizeof(env_buf), "LANG=%s", locale_buf);
450 env_buf[sizeof(env_buf)] = '\0';
451 putenv(env_buf);
452 #elif defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
453 setenv("LC_CTYPE", locale_buf, 1 );
454 #else
455 setenv( "LANG", locale_buf );
456 #endif
460 return 0;