merged tag ooo/DEV300_m102
[LibreOffice.git] / sal / osl / os2 / nlsupport.c
blobab00443e57d7ea96ccbbed5a59e9287247f83d5a
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 #define INCL_WIN
29 #include "svpm.h"
31 #include <osl/nlsupport.h>
32 #include <osl/diagnose.h>
33 #include <osl/process.h>
34 #include <rtl/memory.h>
36 #include <string.h>
38 /*****************************************************************************
39 typedefs
40 *****************************************************************************/
43 typedef struct {
44 const char *key;
45 const rtl_TextEncoding value;
46 } _pair;
49 /*****************************************************************************
50 compare function for binary search
51 *****************************************************************************/
53 static int
54 _pair_compare (const char *key, const _pair *pair)
56 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
57 return result;
60 /*****************************************************************************
61 binary search on encoding tables
62 *****************************************************************************/
64 static const _pair*
65 _pair_search (const char *key, const _pair *base, unsigned int member )
67 unsigned int lower = 0;
68 unsigned int upper = member;
69 unsigned int current;
70 int comparison;
72 /* check for validity of input */
73 if ( (key == NULL) || (base == NULL) || (member == 0) )
74 return NULL;
76 /* binary search */
77 while ( lower < upper )
79 current = (lower + upper) / 2;
80 comparison = _pair_compare( key, base + current );
81 if (comparison < 0)
82 upper = current;
83 else if (comparison > 0)
84 lower = current + 1;
85 else
86 return base + current;
89 return NULL;
93 /*****************************************************************************
94 convert rtl_Locale to locale string
95 *****************************************************************************/
97 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
99 /* check if a valid locale is specified */
100 if( pLocale && pLocale->Language && (pLocale->Language->length == 2) )
102 size_t offset = 0;
104 /* convert language code to ascii */
106 rtl_String *pLanguage = NULL;
108 rtl_uString2String( &pLanguage,
109 pLocale->Language->buffer, pLocale->Language->length,
110 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
112 if( pLanguage->length < n )
114 strcpy( buffer, pLanguage->buffer );
115 offset = pLanguage->length;
118 rtl_string_release( pLanguage );
121 /* convert country code to ascii */
122 if( pLocale->Country && (pLocale->Country->length == 2) )
124 rtl_String *pCountry = NULL;
126 rtl_uString2String( &pCountry,
127 pLocale->Country->buffer, pLocale->Country->length,
128 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
130 if( offset + pCountry->length + 1 < n )
132 strcpy( buffer + offset++, "_" );
133 strcpy( buffer + offset, pCountry->buffer );
134 offset += pCountry->length;
137 rtl_string_release( pCountry );
140 /* convert variant to ascii - check if there is enough space for the variant string */
141 if( pLocale->Variant && pLocale->Variant->length &&
142 ( pLocale->Variant->length < n - 6 ) )
144 rtl_String *pVariant = NULL;
146 rtl_uString2String( &pVariant,
147 pLocale->Variant->buffer, pLocale->Variant->length,
148 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
150 if( offset + pVariant->length + 1 < n )
152 strcpy( buffer + offset, pVariant->buffer );
153 offset += pVariant->length;
156 rtl_string_release( pVariant );
159 return buffer;
162 return NULL;
165 /*****************************************************************************
166 convert locale string to rtl_Locale
167 *****************************************************************************/
169 static rtl_Locale * _parse_locale( const char * locale )
171 static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 };
173 /* check if locale contains a valid string */
174 if( locale )
176 size_t len = strlen( locale );
178 if( len >= 2 )
180 rtl_uString * pLanguage = NULL;
181 rtl_uString * pCountry = NULL;
182 rtl_uString * pVariant = NULL;
184 size_t offset = 2;
186 /* convert language code to unicode */
187 rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
188 OSL_ASSERT(pLanguage != NULL);
190 /* convert country code to unicode */
191 if( len >= 5 && '_' == locale[2] )
193 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
194 OSL_ASSERT(pCountry != NULL);
195 offset = 5;
198 /* convert variant code to unicode - do not rely on "." as delimiter */
199 if( len > offset ) {
200 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
201 OSL_ASSERT(pVariant != NULL);
204 rtl_Locale * ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 );
206 if (pVariant) rtl_uString_release(pVariant);
207 if (pCountry) rtl_uString_release(pCountry);
208 if (pLanguage) rtl_uString_release(pLanguage);
210 return ret;
212 else
213 return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 );
216 return NULL;
220 * This implementation of osl_getTextEncodingFromLocale maps
221 * from the ISO language codes.
224 const _pair _full_locale_list[] = {
225 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP },
226 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP },
227 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR },
228 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN },
229 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW }
232 const _pair _locale_extension_list[] = {
233 { "big5", RTL_TEXTENCODING_BIG5 },
234 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS },
235 { "gb18030", RTL_TEXTENCODING_GB_18030 },
236 { "euc", RTL_TEXTENCODING_EUC_JP },
237 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 },
238 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 },
239 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 },
240 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 },
241 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 },
242 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 },
243 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 },
244 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 },
245 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 },
246 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 },
247 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 },
248 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 },
249 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 },
250 { "koi8-r", RTL_TEXTENCODING_KOI8_R },
251 { "koi8-u", RTL_TEXTENCODING_KOI8_U },
252 { "pck", RTL_TEXTENCODING_MS_932 },
253 #if (0)
254 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW },
255 #endif
256 { "utf-16", RTL_TEXTENCODING_UNICODE },
257 { "utf-7", RTL_TEXTENCODING_UTF7 },
258 { "utf-8", RTL_TEXTENCODING_UTF8 }
261 const _pair _iso_language_list[] = {
262 { "af", RTL_TEXTENCODING_ISO_8859_1 },
263 { "ar", RTL_TEXTENCODING_ISO_8859_6 },
264 { "az", RTL_TEXTENCODING_ISO_8859_9 },
265 { "be", RTL_TEXTENCODING_ISO_8859_5 },
266 { "bg", RTL_TEXTENCODING_ISO_8859_5 },
267 { "ca", RTL_TEXTENCODING_ISO_8859_1 },
268 { "cs", RTL_TEXTENCODING_ISO_8859_2 },
269 { "da", RTL_TEXTENCODING_ISO_8859_1 },
270 { "de", RTL_TEXTENCODING_ISO_8859_1 },
271 { "el", RTL_TEXTENCODING_ISO_8859_7 },
272 { "en", RTL_TEXTENCODING_ISO_8859_1 },
273 { "es", RTL_TEXTENCODING_ISO_8859_1 },
274 { "et", RTL_TEXTENCODING_ISO_8859_4 },
275 { "eu", RTL_TEXTENCODING_ISO_8859_1 },
276 { "fa", RTL_TEXTENCODING_ISO_8859_6 },
277 { "fi", RTL_TEXTENCODING_ISO_8859_1 },
278 { "fo", RTL_TEXTENCODING_ISO_8859_1 },
279 { "fr", RTL_TEXTENCODING_ISO_8859_1 },
280 { "gr", RTL_TEXTENCODING_ISO_8859_7 },
281 { "he", RTL_TEXTENCODING_ISO_8859_8 },
282 { "hi", RTL_TEXTENCODING_DONTKNOW },
283 { "hr", RTL_TEXTENCODING_ISO_8859_2 },
284 { "hu", RTL_TEXTENCODING_ISO_8859_2 },
285 { "hy", RTL_TEXTENCODING_DONTKNOW },
286 { "id", RTL_TEXTENCODING_ISO_8859_1 },
287 { "is", RTL_TEXTENCODING_ISO_8859_1 },
288 { "it", RTL_TEXTENCODING_ISO_8859_1 },
289 { "iw", RTL_TEXTENCODING_ISO_8859_8 },
290 { "ja", RTL_TEXTENCODING_EUC_JP },
291 { "ka", RTL_TEXTENCODING_DONTKNOW },
292 { "kk", RTL_TEXTENCODING_ISO_8859_5 },
293 { "ko", RTL_TEXTENCODING_EUC_KR },
294 { "lt", RTL_TEXTENCODING_ISO_8859_4 },
295 { "lv", RTL_TEXTENCODING_ISO_8859_4 },
296 { "mk", RTL_TEXTENCODING_ISO_8859_5 },
297 { "mr", RTL_TEXTENCODING_DONTKNOW },
298 { "ms", RTL_TEXTENCODING_ISO_8859_1 },
299 { "nl", RTL_TEXTENCODING_ISO_8859_1 },
300 { "no", RTL_TEXTENCODING_ISO_8859_1 },
301 { "pl", RTL_TEXTENCODING_ISO_8859_2 },
302 { "pt", RTL_TEXTENCODING_ISO_8859_1 },
303 { "ro", RTL_TEXTENCODING_ISO_8859_2 },
304 { "ru", RTL_TEXTENCODING_ISO_8859_5 },
305 { "sa", RTL_TEXTENCODING_DONTKNOW },
306 { "sk", RTL_TEXTENCODING_ISO_8859_2 },
307 { "sl", RTL_TEXTENCODING_ISO_8859_2 },
308 { "sq", RTL_TEXTENCODING_ISO_8859_2 },
309 { "sv", RTL_TEXTENCODING_ISO_8859_1 },
310 { "sw", RTL_TEXTENCODING_ISO_8859_1 },
311 { "ta", RTL_TEXTENCODING_DONTKNOW },
312 { "th", RTL_TEXTENCODING_DONTKNOW },
313 { "tr", RTL_TEXTENCODING_ISO_8859_9 },
314 { "tt", RTL_TEXTENCODING_ISO_8859_5 },
315 { "uk", RTL_TEXTENCODING_ISO_8859_5 },
316 { "ur", RTL_TEXTENCODING_ISO_8859_6 },
317 { "uz", RTL_TEXTENCODING_ISO_8859_9 },
318 { "vi", RTL_TEXTENCODING_DONTKNOW },
319 { "zh", RTL_TEXTENCODING_BIG5 }
322 /*****************************************************************************
323 return the text encoding corresponding to the given locale
324 *****************************************************************************/
326 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
328 const _pair *language = 0;
329 char locale_buf[64] = "";
330 char *cp;
332 WinMessageBox(HWND_DESKTOP,HWND_DESKTOP,
333 "Please contact technical support and report above informations.\n\n",
334 "Critical error: osl_getTextEncodingFromLocale",
335 0, MB_ERROR | MB_OK | MB_MOVEABLE);
337 /* default to process locale if pLocale == NULL */
338 if( NULL == pLocale )
339 osl_getProcessLocale( &pLocale );
341 /* convert rtl_Locale to locale string */
342 if( _compose_locale( pLocale, locale_buf, 64 ) )
344 /* check special handling list (EUC) first */
345 const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
346 language = _pair_search( locale_buf, _full_locale_list, members);
348 if( NULL == language )
351 * check if there is a charset qualifier at the end of the given locale string
352 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
353 * charset to use
355 cp = strrchr( locale_buf, '.' );
357 if( NULL != cp )
359 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
360 language = _pair_search( cp + 1, _locale_extension_list, members);
364 /* use iso language code to determine the charset */
365 if( NULL == language )
367 const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );
369 /* iso lang codes have 2 charaters */
370 locale_buf[2] = '\0';
372 language = _pair_search( locale_buf, _iso_language_list, members);
376 /* a matching item in our list provides a mapping from codeset to
377 * rtl-codeset */
378 if ( language != NULL )
379 return language->value;
381 return RTL_TEXTENCODING_DONTKNOW;
384 /*****************************************************************************
385 return the current process locale
386 *****************************************************************************/
388 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
390 /* simulate behavior off setlocale */
391 char * locale = getenv( "LC_ALL" );
393 if( NULL == locale )
394 locale = getenv( "LC_CTYPE" );
396 if( NULL == locale )
397 locale = getenv( "LANG" );
399 if( NULL == locale )
400 locale = "C";
402 *ppLocale = _parse_locale( locale );
405 /*****************************************************************************
406 set the current process locale
407 *****************************************************************************/
409 int _imp_setProcessLocale( rtl_Locale * pLocale )
411 char locale_buf[64];
413 /* convert rtl_Locale to locale string */
414 if( NULL != _compose_locale( pLocale, locale_buf, 64 ) )
416 /* only change env vars that exist already */
417 if( getenv( "LC_ALL" ) ) {
418 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
419 setenv( "LC_ALL", locale_buf, 1);
420 #else
421 setenv( "LC_ALL", locale_buf );
422 #endif
425 if( getenv( "LC_CTYPE" ) ) {
426 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
427 setenv("LC_CTYPE", locale_buf, 1 );
428 #else
429 setenv( "LC_CTYPE", locale_buf );
430 #endif
433 if( getenv( "LANG" ) ) {
434 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
435 setenv("LC_CTYPE", locale_buf, 1 );
436 #else
437 setenv( "LANG", locale_buf );
438 #endif
442 return 0;