usr/src/cmd/ldap/common/convutf8.c

   1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5
   6 /*
   7  * The contents of this file are subject to the Netscape Public
   8  * License Version 1.1 (the "License"); you may not use this file
   9  * except in compliance with the License. You may obtain a copy of
  10  * the License at http://www.mozilla.org/NPL/
  11  *
  12  * Software distributed under the License is distributed on an "AS
  13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  14  * implied. See the License for the specific language governing
  15  * rights and limitations under the License.
  16  *
  17  * The Original Code is Mozilla Communicator client code, released
  18  * March 31, 1998.
  19  *
  20  * The Initial Developer of the Original Code is Netscape
  21  * Communications Corporation. Portions created by Netscape are
  22  * Copyright (C) 1998-1999 Netscape Communications Corporation. All
  23  * Rights Reserved.
  24  *
  25  * Contributor(s):
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <locale.h>
  32 #include <ctype.h>
  33
  34 #ifndef HAVE_LIBICU
  35
  36 #ifdef SOLARIS_LDAP_CMD
  37 #include <errno.h>
  38 #include <langinfo.h>
  39 #include <iconv.h>
  40 #endif
  41
  42 #ifdef __cplusplus
  43 extern "C" {
  44 #endif
  45
  46 extern char     *ldaptool_charset;
  47 char            *ldaptool_convdir = NULL;
  48 static          int charsetset = 0;
  49 char            *ldaptool_local2UTF8( const char *src );
  50
  51 #ifdef SOLARIS_LDAP_CMD
  52 static char     *ldaptool_convert( const char *src, const char *fcode,
  53                                 const char *tcode);
  54 char            *ldaptool_UTF82local( const char *src );
  55 #endif  /* SOLARIS_LDAP_CMD */
  56
  57 #ifdef SOLARIS_LDAP_CMD
  58 /*
  59  * ICU version always returns string, unless strdup fails.
  60  * As in ICU version, in case of error strdup(src)
  61  * Usually strdup(src) will be ASCII and legal anyways.
  62  */
  63
  64 static char *
  65 ldaptool_convert( const char *src, const char *fcode,
  66                                  const char *tcode) {
  67     char        *dest, *tptr, *tmp;
  68     const char  *fptr;
  69     iconv_t     cd;
  70     size_t      ileft, oleft, ret, size;
  71
  72     if (src == NULL)
  73         return (NULL);
  74
  75     if (fcode == NULL || tcode == NULL)
  76         return (strdup(src));
  77
  78     if (strcasecmp(fcode, tcode) == 0)
  79         return (strdup(src));
  80
  81     if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) {
  82         /* conversion table not available */
  83         return (strdup(src));
  84     }
  85
  86     ileft = strlen(src);
  87     oleft = 2 * ileft;
  88     size = oleft;
  89     ret = -1;
  90     if ((dest = (char *)malloc(size)) == NULL) {
  91         (void) iconv_close(cd);
  92         /* maybe sizeof strlen(src) memory still exists */
  93         return (strdup(src));
  94     }
  95     tptr = dest;
  96     fptr = src;
  97
  98     for (;;) {
  99         ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
 100
 101         if (ret != (size_t)-1) {
 102                 /*
 103                  * Success. Place 'cd' into its initial shift
 104                  * state before returning.
 105                  */
 106                 if (fptr == NULL) /* already in initial state  */
 107                         break;
 108                 fptr = NULL;
 109                 ileft = 0;
 110                 continue;
 111         } if (errno == E2BIG) {
 112                 /*
 113                  * Lack of space in output buffer.
 114                  * Hence double the size and retry.
 115                  * But before calling  iconv(), oleft
 116                  * and tptr have to re-adjusted, so that
 117                  * iconv() doesn't overwrite the data
 118                  * which has already been converted.
 119                  */
 120                 oleft += size;
 121                 size *= 2;
 122                 if ((tmp = (char *) realloc(dest, size)) == NULL)
 123                         break;
 124                 tptr = tmp + (tptr - dest);
 125                 dest = tmp;
 126                 continue;
 127         } else {
 128                 /* Other errors */
 129                 break;
 130         }
 131     }
 132
 133     if (dest != NULL) {
 134         if (ret == -1) {
 135                 /* Free malloc'ed memory on failure */
 136                 free(dest);
 137                 dest = NULL;
 138         } else if (oleft > 0) {
 139                 /* NULL terminate the return value */
 140                 *(dest + (size - oleft)) = '\0';
 141         } else {
 142                 /* realloc one more byte and NULL terminate */
 143                 if ((tmp = (char *) realloc(dest, size + 1)) == NULL) {
 144                         free(dest);
 145                         dest = NULL;
 146                 } else {
 147                         *(dest + size) = '\0';
 148                 }
 149         }
 150     }
 151
 152     (void) iconv_close(cd);
 153     if (dest == NULL) {
 154         /* last chance in case some other failure along the way occurs */
 155         return (strdup(src));
 156     }
 157     return (dest);
 158 }
 159
 160 char *
 161 ldaptool_UTF82local( const char *src )
 162 {
 163     char *to_code;
 164     if ((to_code = nl_langinfo(CODESET)) == NULL)
 165         return (strdup(src));
 166     return (ldaptool_convert(src, "UTF-8", (const char *)to_code));
 167 }
 168 #endif  /* SOLARIS_LDAP_CMD */
 169
 170 char *
 171 ldaptool_local2UTF8( const char *src )
 172 {
 173 #ifdef SOLARIS_LDAP_CMD
 174     char *from_code;
 175     if ((from_code = nl_langinfo(CODESET)) == NULL)
 176         return (strdup(src));
 177     return (ldaptool_convert(src, (const char *)from_code, "UTF-8"));
 178 #else
 179     char *utf8;
 180     charsetset = 0;
 181     if (src == NULL)
 182     {
 183         return NULL;
 184     }
 185     utf8 = strdup(src);
 186     return ( utf8 );
 187 #endif  /* SOLARIS_LDAP_CMD */
 188 }
 189
 190 #else /* HAVE_LIBICU */
 191
 192 #include "unicode/utypes.h"
 193 #include "unicode/ucnv.h"
 194
 195 #define NSPR20
 196
 197 #ifdef XP_WIN32
 198 #define  VC_EXTRALEAN
 199 #include <afxwin.h>
 200 #include <winnls.h>
 201 #endif
 202
 203 extern char *ldaptool_charset;
 204 static int charsetset = 0;
 205
 206 extern "C" {
 207 char *ldaptool_convdir = NULL;
 208 char *ldaptool_local2UTF8( const char * );
 209 }
 210
 211 #ifndef XP_WIN32
 212 char * GetNormalizedLocaleName(void);
 213
 214
 215 char *
 216 GetNormalizedLocaleName(void)
 217 {
 218 #ifdef _HPUX_SOURCE
 219
 220     int    len;
 221     char    *locale;
 222
 223     locale = setlocale(LC_CTYPE, "");
 224     if (locale && *locale) {
 225         len = strlen(locale);
 226     } else {
 227         locale = "C";
 228         len = 1;
 229     }
 230
 231     if ((!strncmp(locale, "/\x03:", 3)) &&
 232         (!strcmp(&locale[len - 2], ";/"))) {
 233         locale += 3;
 234         len -= 5;
 235     }
 236
 237     locale = strdup(locale);
 238     if (locale) {
 239         locale[len] = 0;
 240     }
 241
 242     return locale;
 243
 244 #else
 245
 246     char    *locale;
 247
 248     locale = setlocale(LC_CTYPE, "");
 249     if (locale && *locale) {
 250         return strdup(locale);
 251     }
 252
 253     return strdup("C");
 254
 255 #endif
 256 }
 257
 258 #if defined(IRIX)
 259 const char *CHARCONVTABLE[] =
 260 {
 261 "! This table maps the host's locale names to IANA charsets",
 262 "!",
 263 "C:             ISO_8859-1:1987",
 264 "cs:            ISO_8859-2:1987",
 265 "da:            ISO_8859-1:1987",
 266 "de:            ISO_8859-1:1987",
 267 "de_AT:         ISO_8859-1:1987",
 268 "de_CH:         ISO_8859-1:1987",
 269 "en:            ISO_8859-1:1987",
 270 "en_AU:         ISO_8859-1:1987",
 271 "en_CA:         ISO_8859-1:1987",
 272 "en_TH:         ISO_8859-1:1987",
 273 "en_US:         ISO_8859-1:1987",
 274 "es:            ISO_8859-1:1987",
 275 "fi:            ISO_8859-1:1987",
 276 "fr:            ISO_8859-1:1987",
 277 "fr_BE:         ISO_8859-1:1987",
 278 "fr_CA:         ISO_8859-1:1987",
 279 "fr_CH:         ISO_8859-1:1987",
 280 "is:            ISO_8859-1:1987",
 281 "it:            ISO_8859-1:1987",
 282 "it_CH:         ISO_8859-1:1987",
 283 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
 284 "ko_KR.euc:     EUC-KR",
 285 "nl:            ISO_8859-1:1987",
 286 "nl_BE:         ISO_8859-1:1987",
 287 "no:            ISO_8859-1:1987",
 288 "pl:            ISO_8859-2:1987",
 289 "pt:            ISO_8859-1:1987",
 290 "sh:            ISO_8859-2:1987",
 291 "sk:            ISO_8859-2:1987",
 292 "sv:            ISO_8859-1:1987",
 293 "zh_CN.ugb:     GB2312",
 294 "zh_TW.ucns:    cns11643_1",
 295 NULL
 296 };
 297 #elif defined(SOLARIS)
 298 const char *CHARCONVTABLE[] =
 299 {
 300 "! This table maps the host's locale names to IANA charsets",
 301 "!",
 302 "C:             ISO_8859-1:1987",
 303 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
 304 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
 305 "ja_JP.PCK:     Shift_JIS",
 306 "en:            ISO_8859-1:1987",
 307 "en_AU:         ISO_8859-1:1987",
 308 "en_CA:         ISO_8859-1:1987",
 309 "en_UK:         ISO_8859-1:1987",
 310 "en_US:         ISO_8859-1:1987",
 311 "es:            ISO_8859-1:1987",
 312 "es_AR:         ISO_8859-1:1987",
 313 "es_BO:         ISO_8859-1:1987",
 314 "es_CL:         ISO_8859-1:1987",
 315 "es_CO:         ISO_8859-1:1987",
 316 "es_CR:         ISO_8859-1:1987",
 317 "es_EC:         ISO_8859-1:1987",
 318 "es_GT:         ISO_8859-1:1987",
 319 "es_MX:         ISO_8859-1:1987",
 320 "es_NI:         ISO_8859-1:1987",
 321 "es_PA:         ISO_8859-1:1987",
 322 "es_PE:         ISO_8859-1:1987",
 323 "es_PY:         ISO_8859-1:1987",
 324 "es_SV:         ISO_8859-1:1987",
 325 "es_UY:         ISO_8859-1:1987",
 326 "es_VE:         ISO_8859-1:1987",
 327 "fr:            ISO_8859-1:1987",
 328 "fr_BE:         ISO_8859-1:1987",
 329 "fr_CA:         ISO_8859-1:1987",
 330 "fr_CH:         ISO_8859-1:1987",
 331 "de:            ISO_8859-1:1987",
 332 "de_AT:         ISO_8859-1:1987",
 333 "de_CH:         ISO_8859-1:1987",
 334 "nl:            ISO_8859-1:1987",
 335 "nl_BE:         ISO_8859-1:1987",
 336 "it:            ISO_8859-1:1987",
 337 "sv:            ISO_8859-1:1987",
 338 "no:            ISO_8859-1:1987",
 339 "da:            ISO_8859-1:1987",
 340 "iso_8859_1:    ISO_8859-1:1987",
 341 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
 342 "ko:            EUC-KR",
 343 "zh:            GB2312",
 344 "zh_TW:         cns11643_1",
 345 NULL
 346 };
 347 #elif defined(OSF1)
 348 const char *CHARCONVTABLE[] =
 349 {
 350 "! This table maps the host's locale names to IANA charsets",
 351 "!",
 352 "C:                     ISO_8859-1:1987",
 353 "cs_CZ.ISO8859-2:       ISO_8859-2:1987",
 354 "cs_CZ:                 ISO_8859-2:1987",
 355 "da_DK.ISO8859-1:       ISO_8859-1:1987",
 356 "de_CH.ISO8859-1:       ISO_8859-1:1987",
 357 "de_DE.ISO8859-1:       ISO_8859-1:1987",
 358 "en_GB.ISO8859-1:       ISO_8859-1:1987",
 359 "en_US.ISO8859-1:       ISO_8859-1:1987",
 360 "es_ES.ISO8859-1:       ISO_8859-1:1987",
 361 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
 362 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
 363 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
 364 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
 365 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
 366 "hu_HU.ISO8859-2:       ISO_8859-2:1987",
 367 "hu_HU:                 ISO_8859-2:1987",
 368 "is_IS.ISO8859-1:       ISO_8859-1:1987",
 369 "it_IT.ISO8859-1:       ISO_8859-1:1987",
 370 "ja_JP.SJIS:            Shift_JIS",
 371 "ja_JP.eucJP:           Extended_UNIX_Code_Packed_Format_for_Japanese",
 372 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
 373 "ko_KR.eucKR:           EUC-KR",
 374 "ko_KR:                 EUC-KR",
 375 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
 376 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
 377 "no_NO.ISO8859-1:       ISO_8859-1:1987",
 378 "pl_PL.ISO8859-2:       ISO_8859-2:1987",
 379 "pl_PL:                 ISO_8859-2:1987",
 380 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
 381 "sk_SK.ISO8859-2:       ISO_8859-2:1987",
 382 "sk_SK:                 ISO_8859-2:1987",
 383 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
 384 "zh_CN:                 GB2312",
 385 "zh_HK.big5:            Big5",
 386 "zh_HK.eucTW:           cns11643_1",
 387 "zh_TW.big5:            Big5",
 388 "zh_TW.big5@chuyin:     Big5",
 389 "zh_TW.big5@radical:    Big5",
 390 "zh_TW.big5@stroke:     Big5",
 391 "zh_TW.eucTW:           cns11643_1",
 392 "zh_TW.eucTW@chuyin:    cns11643_1",
 393 "zh_TW.eucTW@radical:   cns11643_1",
 394 "zh_TW.eucTW@stroke:    cns11643_1",
 395 "zh_TW:                 cns11643_1",
 396 NULL
 397 };
 398 #elif defined(HPUX)
 399 const char *CHARCONVTABLE[] =
 400 {
 401 "! This table maps the host's locale names to IANA charsets",
 402 "!",
 403 "C:                     ISO_8859-1:1987",
 404 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
 405 "ja_JP.SJIS:            Shift_JIS",
 406 "ja_JP.eucJP:           Extended_UNIX_Code_Packed_Format_for_Japanese",
 407 "es_ES:                 ISO_8859-1:1987",
 408 "es_ES.iso88591:        ISO_8859-1:1987",
 409 "sv_SE:                 ISO_8859-1:1987",
 410 "sv_SE.iso88591:        ISO_8859-1:1987",
 411 "da_DK:                 ISO_8859-1:1987",
 412 "da_DK.iso88591:        ISO_8859-1:1987",
 413 "nl_NL:                 ISO_8859-1:1987",
 414 "nl_NL.iso88591:        ISO_8859-1:1987",
 415 "en:                    ISO_8859-1:1987",
 416 "en_GB:                 ISO_8859-1:1987",
 417 "en_GB.iso88591:        ISO_8859-1:1987",
 418 "en_US:                 ISO_8859-1:1987",
 419 "en_US.iso88591:        ISO_8859-1:1987",
 420 "fi_FI:                 ISO_8859-1:1987",
 421 "fi_FI.iso88591:        ISO_8859-1:1987",
 422 "fr_CA:                 ISO_8859-1:1987",
 423 "fr_CA.iso88591:        ISO_8859-1:1987",
 424 "fr_FR:                 ISO_8859-1:1987",
 425 "fr_FR.iso88591:        ISO_8859-1:1987",
 426 "de_DE:                 ISO_8859-1:1987",
 427 "de_DE.iso88591:        ISO_8859-1:1987",
 428 "is_IS:                 ISO_8859-1:1987",
 429 "is_IS.iso88591:        ISO_8859-1:1987",
 430 "it_IT:                 ISO_8859-1:1987",
 431 "it_IT.iso88591:        ISO_8859-1:1987",
 432 "no_NO:                 ISO_8859-1:1987",
 433 "no_NO.iso88591:        ISO_8859-1:1987",
 434 "pt_PT:                 ISO_8859-1:1987",
 435 "pt_PT.iso88591:        ISO_8859-1:1987",
 436 "hu_HU:                 ISO_8859-2:1987",
 437 "hu_HU.iso88592:        ISO_8859-2:1987",
 438 "cs_CZ:                 ISO_8859-2:1987",
 439 "cs_CZ.iso88592:        ISO_8859-2:1987",
 440 "pl_PL:                 ISO_8859-2:1987",
 441 "pl_PL.iso88592:        ISO_8859-2:1987",
 442 "ro_RO:                 ISO_8859-2:1987",
 443 "ro_RO.iso88592:        ISO_8859-2:1987",
 444 "hr_HR:                 ISO_8859-2:1987",
 445 "hr_HR.iso88592:        ISO_8859-2:1987",
 446 "sk_SK:                 ISO_8859-2:1987",
 447 "sk_SK.iso88592:        ISO_8859-2:1987",
 448 "sl_SI:                 ISO_8859-2:1987",
 449 "sl_SI.iso88592:        ISO_8859-2:1987",
 450 "american.iso88591:     ISO_8859-1:1987",
 451 "bulgarian:             ISO_8859-2:1987",
 452 "c-french.iso88591:     ISO_8859-1:1987",
 453 "chinese-s:             GB2312",
 454 "chinese-t.big5:                Big5",
 455 "czech:                 ISO_8859-2:1987",
 456 "danish.iso88591:       ISO_8859-1:1987",
 457 "dutch.iso88591:                ISO_8859-1:1987",
 458 "english.iso88591:      ISO_8859-1:1987",
 459 "finnish.iso88591:      ISO_8859-1:1987",
 460 "french.iso88591:       ISO_8859-1:1987",
 461 "german.iso88591:       ISO_8859-1:1987",
 462 "hungarian:             ISO_8859-2:1987",
 463 "icelandic.iso88591:    ISO_8859-1:1987",
 464 "italian.iso88591:      ISO_8859-1:1987",
 465 "japanese.euc:          Extended_UNIX_Code_Packed_Format_for_Japanese",
 466 "japanese:              Shift_JIS",
 467 "katakana:              Shift_JIS",
 468 "korean:                        EUC-KR",
 469 "norwegian.iso88591:    ISO_8859-1:1987",
 470 "polish:                        ISO_8859-2:1987",
 471 "portuguese.iso88591:   ISO_8859-1:1987",
 472 "rumanian:              ISO_8859-2:1987",
 473 "serbocroatian:         ISO_8859-2:1987",
 474 "slovene:               ISO_8859-2:1987",
 475 "spanish.iso88591:      ISO_8859-1:1987",
 476 "swedish.iso88591:      ISO_8859-1:1987",
 477 NULL
 478 };
 479 #elif defined(AIX)
 480 const char *CHARCONVTABLE[] =
 481 {
 482 "! This table maps the host's locale names to IANA charsets",
 483 "!",
 484 "C:                     ISO_8859-1:1987",
 485 "En_JP.IBM-932:         Shift_JIS",
 486 "En_JP:                 Shift_JIS",
 487 "Ja_JP.IBM-932:         Shift_JIS",
 488 "Ja_JP:                 Shift_JIS",
 489 "da_DK.ISO8859-1:       ISO_8859-1:1987",
 490 "da_DK:                 ISO_8859-1:1987",
 491 "de_CH.ISO8859-1:       ISO_8859-1:1987",
 492 "de_CH:                 ISO_8859-1:1987",
 493 "de_DE.ISO8859-1:       ISO_8859-1:1987",
 494 "de_DE:                 ISO_8859-1:1987",
 495 "en_GB.ISO8859-1:       ISO_8859-1:1987",
 496 "en_GB:                 ISO_8859-1:1987",
 497 "en_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
 498 "en_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
 499 "en_KR.IBM-eucKR:       EUC-KR",
 500 "en_KR:                 EUC-KR",
 501 "en_TW.IBM-eucTW:       cns11643_1",
 502 "en_TW:                 cns11643_1",
 503 "en_US.ISO8859-1:       ISO_8859-1:1987",
 504 "en_US:                 ISO_8859-1:1987",
 505 "es_ES.ISO8859-1:       ISO_8859-1:1987",
 506 "es_ES:                 ISO_8859-1:1987",
 507 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
 508 "fi_FI:                 ISO_8859-1:1987",
 509 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
 510 "fr_BE:                 ISO_8859-1:1987",
 511 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
 512 "fr_CA:                 ISO_8859-1:1987",
 513 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
 514 "fr_CH:                 ISO_8859-1:1987",
 515 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
 516 "fr_FR:                 ISO_8859-1:1987",
 517 "is_IS.ISO8859-1:       ISO_8859-1:1987",
 518 "is_IS:                 ISO_8859-1:1987",
 519 "it_IT.ISO8859-1:       ISO_8859-1:1987",
 520 "it_IT:                 ISO_8859-1:1987",
 521 "ja_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
 522 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
 523 "ko_KR.IBM-eucKR:       EUC-KR",
 524 "ko_KR:                 EUC-KR",
 525 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
 526 "nl_BE:                 ISO_8859-1:1987",
 527 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
 528 "nl_NL:                 ISO_8859-1:1987",
 529 "no_NO.ISO8859-1:       ISO_8859-1:1987",
 530 "no_NO:                 ISO_8859-1:1987",
 531 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
 532 "pt_PT:                 ISO_8859-1:1987",
 533 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
 534 "sv_SE:                 ISO_8859-1:1987",
 535 "zh_TW.IBM-eucTW:       cns11643_1",
 536 "zh_TW:                 cns11643_1",
 537 NULL
 538 };
 539 #else   // sunos by default
 540 const char *CHARCONVTABLE[] =
 541 {
 542 "! This table maps the host's locale names to IANA charsets",
 543 "!",
 544 "C:             ISO_8859-1:1987",
 545 "de:            ISO_8859-1:1987",
 546 "en_US:         ISO_8859-1:1987",
 547 "es:            ISO_8859-1:1987",
 548 "fr:            ISO_8859-1:1987",
 549 "iso_8859_1:    ISO_8859-1:1987",
 550 "it:            ISO_8859-1:1987",
 551 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
 552 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
 553 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
 554 "ko:            EUC-KR",
 555 "sv:            ISO_8859-1:1987",
 556 "zh:            GB2312",
 557 "zh_TW:         cns11643_1",
 558 NULL
 559 };
 560 #endif
 561
 562 #define BSZ     256
 563
 564 char *
 565 GetCharsetFromLocale(char *locale)
 566 {
 567     char *tmpcharset = NULL;
 568     char buf[BSZ];
 569     char *p;
 570     const char *line;
 571     int i=0;
 572
 573     line = CHARCONVTABLE[i];
 574     while (line != NULL)
 575     {
 576        if (*line == 0)
 577        {
 578           break;
 579        }
 580
 581        strcpy(buf, line);
 582        line = CHARCONVTABLE[++i];
 583
 584        if (strlen(buf) == 0 || buf[0] == '!')
 585        {
 586           continue;
 587        }
 588        p = strchr(buf, ':');
 589        if (p == NULL)
 590        {
 591           tmpcharset = NULL;
 592           break;
 593        }
 594        *p = 0;
 595        if (strcmp(buf, locale) == 0) {
 596           while (*++p == ' ' || *p == '\t')
 597              ;
 598           if (isalpha(*p)) {
 599              tmpcharset = strdup(p);
 600           } else
 601              tmpcharset = NULL;
 602
 603           break;
 604        }
 605     }
 606     return tmpcharset;
 607 }
 608
 609 #endif /* Not defined XP_WIN32 */
 610
 611 #ifdef XP_WIN32
 612 char *_convertor(const char *instr, int bFromUTF8)
 613 {
 614     char  *outstr = NULL;
 615     int    inlen, wclen, outlen;
 616     LPWSTR wcstr;
 617
 618     if (instr == NULL)
 619             return NULL;
 620
 621     if ((inlen = strlen(instr)) <= 0)
 622             return NULL;
 623
 624     /* output never becomes longer than input,
 625      * thus we don't have to ask for the length
 626      */
 627     wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) );
 628     if (!wcstr)
 629         return NULL;
 630
 631     wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr,
 632                                  inlen, wcstr, inlen);
 633     outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
 634                                   wclen, NULL, 0, NULL, NULL);
 635
 636     if (outlen > 0) {
 637         outstr = (char *) malloc(outlen + 2);
 638         outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
 639                                       wclen, outstr, outlen, NULL, NULL);
 640         if (outlen > 0)
 641             *(outstr+outlen) = _T('\0');
 642         else
 643             return NULL;
 644     }
 645     free( wcstr );
 646     return outstr;
 647 }
 648 #endif
 649
 650 char *
 651 ldaptool_local2UTF8( const char *src )
 652 {
 653     char *utf8;
 654 #ifndef XP_WIN32
 655     char *locale, *newcharset;
 656     size_t outLen, resultLen;
 657     UErrorCode err = U_ZERO_ERROR;
 658     UConverter *cnv;
 659
 660     if (src == NULL)
 661     {
 662       return NULL;
 663     }
 664     else if (*src == 0 || (ldaptool_charset == NULL)
 665              || (!strcmp( ldaptool_charset, "" )))
 666     {
 667         /* no option specified, so assume it's already in utf-8 */
 668         utf8 = strdup(src);
 669         return utf8;
 670     }
 671
 672     if( !strcmp( ldaptool_charset, "0" )
 673             && (!charsetset) )
 674     {
 675         /* zero option specified, so try to get default codepage
 676            this sucker is strdup'd immediately so it's OK to cast */
 677         newcharset = (char *)ucnv_getDefaultName();
 678         if (newcharset != NULL) {
 679             free( ldaptool_charset );
 680             /* the default codepage lives in ICU */
 681             ldaptool_charset = strdup(newcharset);
 682             if (ldaptool_charset == NULL) {
 683                 return strdup(src);
 684             }
 685         }
 686         charsetset = 1;
 687     }
 688     else
 689     if( strcmp( ldaptool_charset, "" ) && (!charsetset) )
 690     {
 691         /* -i option specified with charset name */
 692         charsetset = 1;
 693     }
 694
 695     /* do the preflight - get the size needed for the target buffer */
 696     outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src,
 697                                       strlen( src ) * sizeof(char), &err);
 698
 699     if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) {
 700       /* default to just a copy of the string - this covers
 701          the case of an illegal charset also */
 702       return strdup(src);
 703     }
 704
 705     utf8 =  (char *) malloc( outLen + 1);
 706     if( utf8 == NULL ) {
 707       /* if we're already out of memory, does strdup just return NULL? */
 708        return strdup(src);
 709     }
 710
 711     /* do the actual conversion this time */
 712     err = U_ZERO_ERROR;
 713     resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src,
 714                        strlen(src) * sizeof(char), &err );
 715
 716     if (!U_SUCCESS(err)) {
 717       free(utf8);
 718       return strdup(src);
 719     }
 720
 721 #else
 722     utf8 = _convertor(src, FALSE);
 723     if( utf8 == NULL )
 724         utf8 = strdup(src);
 725 #endif
 726
 727     return utf8;
 728 }
 729 #endif /* HAVE_LIBICU */
 730
 731 #ifndef HAVE_LIBICU
 732 #ifdef __cplusplus
 733 }
 734 #endif
 735 #endif