2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
7 * The contents of this file are subject to the Netscape Public
8 * License Version 1.1 (the "License"); you may not use this file
9 * except in compliance with the License. You may obtain a copy of
10 * the License at http://www.mozilla.org/NPL/
12 * Software distributed under the License is distributed on an "AS
13 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
14 * implied. See the License for the specific language governing
15 * rights and limitations under the License.
17 * The Original Code is Mozilla Communicator client code, released
20 * The Initial Developer of the Original Code is Netscape
21 * Communications Corporation. Portions created by Netscape are
22 * Copyright (C) 1998-1999 Netscape Communications Corporation. All
36 #ifdef SOLARIS_LDAP_CMD
46 extern char *ldaptool_charset
;
47 char *ldaptool_convdir
= NULL
;
48 static int charsetset
= 0;
49 char *ldaptool_local2UTF8( const char *src
);
51 #ifdef SOLARIS_LDAP_CMD
52 static char *ldaptool_convert( const char *src
, const char *fcode
,
54 char *ldaptool_UTF82local( const char *src
);
55 #endif /* SOLARIS_LDAP_CMD */
57 #ifdef SOLARIS_LDAP_CMD
59 * ICU version always returns string, unless strdup fails.
60 * As in ICU version, in case of error strdup(src)
61 * Usually strdup(src) will be ASCII and legal anyways.
65 ldaptool_convert( const char *src
, const char *fcode
,
67 char *dest
, *tptr
, *tmp
;
70 size_t ileft
, oleft
, ret
, size
;
75 if (fcode
== NULL
|| tcode
== NULL
)
78 if (strcasecmp(fcode
, tcode
) == 0)
81 if ((cd
= iconv_open(tcode
, fcode
)) == (iconv_t
)-1) {
82 /* conversion table not available */
90 if ((dest
= (char *)malloc(size
)) == NULL
) {
91 (void) iconv_close(cd
);
92 /* maybe sizeof strlen(src) memory still exists */
99 ret
= iconv(cd
, &fptr
, &ileft
, &tptr
, &oleft
);
101 if (ret
!= (size_t)-1) {
103 * Success. Place 'cd' into its initial shift
104 * state before returning.
106 if (fptr
== NULL
) /* already in initial state */
111 } if (errno
== E2BIG
) {
113 * Lack of space in output buffer.
114 * Hence double the size and retry.
115 * But before calling iconv(), oleft
116 * and tptr have to re-adjusted, so that
117 * iconv() doesn't overwrite the data
118 * which has already been converted.
122 if ((tmp
= (char *) realloc(dest
, size
)) == NULL
)
124 tptr
= tmp
+ (tptr
- dest
);
135 /* Free malloc'ed memory on failure */
138 } else if (oleft
> 0) {
139 /* NULL terminate the return value */
140 *(dest
+ (size
- oleft
)) = '\0';
142 /* realloc one more byte and NULL terminate */
143 if ((tmp
= (char *) realloc(dest
, size
+ 1)) == NULL
) {
147 *(dest
+ size
) = '\0';
152 (void) iconv_close(cd
);
154 /* last chance in case some other failure along the way occurs */
155 return (strdup(src
));
161 ldaptool_UTF82local( const char *src
)
164 if ((to_code
= nl_langinfo(CODESET
)) == NULL
)
165 return (strdup(src
));
166 return (ldaptool_convert(src
, "UTF-8", (const char *)to_code
));
168 #endif /* SOLARIS_LDAP_CMD */
171 ldaptool_local2UTF8( const char *src
)
173 #ifdef SOLARIS_LDAP_CMD
175 if ((from_code
= nl_langinfo(CODESET
)) == NULL
)
176 return (strdup(src
));
177 return (ldaptool_convert(src
, (const char *)from_code
, "UTF-8"));
187 #endif /* SOLARIS_LDAP_CMD */
190 #else /* HAVE_LIBICU */
192 #include "unicode/utypes.h"
193 #include "unicode/ucnv.h"
203 extern char *ldaptool_charset
;
204 static int charsetset
= 0;
207 char *ldaptool_convdir
= NULL
;
208 char *ldaptool_local2UTF8( const char * );
212 char * GetNormalizedLocaleName(void);
216 GetNormalizedLocaleName(void)
223 locale
= setlocale(LC_CTYPE
, "");
224 if (locale
&& *locale
) {
225 len
= strlen(locale
);
231 if ((!strncmp(locale
, "/\x03:", 3)) &&
232 (!strcmp(&locale
[len
- 2], ";/"))) {
237 locale
= strdup(locale
);
248 locale
= setlocale(LC_CTYPE
, "");
249 if (locale
&& *locale
) {
250 return strdup(locale
);
259 const char *CHARCONVTABLE
[] =
261 "! This table maps the host's locale names to IANA charsets",
263 "C: ISO_8859-1:1987",
264 "cs: ISO_8859-2:1987",
265 "da: ISO_8859-1:1987",
266 "de: ISO_8859-1:1987",
267 "de_AT: ISO_8859-1:1987",
268 "de_CH: ISO_8859-1:1987",
269 "en: ISO_8859-1:1987",
270 "en_AU: ISO_8859-1:1987",
271 "en_CA: ISO_8859-1:1987",
272 "en_TH: ISO_8859-1:1987",
273 "en_US: ISO_8859-1:1987",
274 "es: ISO_8859-1:1987",
275 "fi: ISO_8859-1:1987",
276 "fr: ISO_8859-1:1987",
277 "fr_BE: ISO_8859-1:1987",
278 "fr_CA: ISO_8859-1:1987",
279 "fr_CH: ISO_8859-1:1987",
280 "is: ISO_8859-1:1987",
281 "it: ISO_8859-1:1987",
282 "it_CH: ISO_8859-1:1987",
283 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
285 "nl: ISO_8859-1:1987",
286 "nl_BE: ISO_8859-1:1987",
287 "no: ISO_8859-1:1987",
288 "pl: ISO_8859-2:1987",
289 "pt: ISO_8859-1:1987",
290 "sh: ISO_8859-2:1987",
291 "sk: ISO_8859-2:1987",
292 "sv: ISO_8859-1:1987",
294 "zh_TW.ucns: cns11643_1",
297 #elif defined(SOLARIS)
298 const char *CHARCONVTABLE
[] =
300 "! This table maps the host's locale names to IANA charsets",
302 "C: ISO_8859-1:1987",
303 "ja: Extended_UNIX_Code_Packed_Format_for_Japanese",
304 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
305 "ja_JP.PCK: Shift_JIS",
306 "en: ISO_8859-1:1987",
307 "en_AU: ISO_8859-1:1987",
308 "en_CA: ISO_8859-1:1987",
309 "en_UK: ISO_8859-1:1987",
310 "en_US: ISO_8859-1:1987",
311 "es: ISO_8859-1:1987",
312 "es_AR: ISO_8859-1:1987",
313 "es_BO: ISO_8859-1:1987",
314 "es_CL: ISO_8859-1:1987",
315 "es_CO: ISO_8859-1:1987",
316 "es_CR: ISO_8859-1:1987",
317 "es_EC: ISO_8859-1:1987",
318 "es_GT: ISO_8859-1:1987",
319 "es_MX: ISO_8859-1:1987",
320 "es_NI: ISO_8859-1:1987",
321 "es_PA: ISO_8859-1:1987",
322 "es_PE: ISO_8859-1:1987",
323 "es_PY: ISO_8859-1:1987",
324 "es_SV: ISO_8859-1:1987",
325 "es_UY: ISO_8859-1:1987",
326 "es_VE: ISO_8859-1:1987",
327 "fr: ISO_8859-1:1987",
328 "fr_BE: ISO_8859-1:1987",
329 "fr_CA: ISO_8859-1:1987",
330 "fr_CH: ISO_8859-1:1987",
331 "de: ISO_8859-1:1987",
332 "de_AT: ISO_8859-1:1987",
333 "de_CH: ISO_8859-1:1987",
334 "nl: ISO_8859-1:1987",
335 "nl_BE: ISO_8859-1:1987",
336 "it: ISO_8859-1:1987",
337 "sv: ISO_8859-1:1987",
338 "no: ISO_8859-1:1987",
339 "da: ISO_8859-1:1987",
340 "iso_8859_1: ISO_8859-1:1987",
341 "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese",
348 const char *CHARCONVTABLE
[] =
350 "! This table maps the host's locale names to IANA charsets",
352 "C: ISO_8859-1:1987",
353 "cs_CZ.ISO8859-2: ISO_8859-2:1987",
354 "cs_CZ: ISO_8859-2:1987",
355 "da_DK.ISO8859-1: ISO_8859-1:1987",
356 "de_CH.ISO8859-1: ISO_8859-1:1987",
357 "de_DE.ISO8859-1: ISO_8859-1:1987",
358 "en_GB.ISO8859-1: ISO_8859-1:1987",
359 "en_US.ISO8859-1: ISO_8859-1:1987",
360 "es_ES.ISO8859-1: ISO_8859-1:1987",
361 "fi_FI.ISO8859-1: ISO_8859-1:1987",
362 "fr_BE.ISO8859-1: ISO_8859-1:1987",
363 "fr_CA.ISO8859-1: ISO_8859-1:1987",
364 "fr_CH.ISO8859-1: ISO_8859-1:1987",
365 "fr_FR.ISO8859-1: ISO_8859-1:1987",
366 "hu_HU.ISO8859-2: ISO_8859-2:1987",
367 "hu_HU: ISO_8859-2:1987",
368 "is_IS.ISO8859-1: ISO_8859-1:1987",
369 "it_IT.ISO8859-1: ISO_8859-1:1987",
370 "ja_JP.SJIS: Shift_JIS",
371 "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
372 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
373 "ko_KR.eucKR: EUC-KR",
375 "nl_BE.ISO8859-1: ISO_8859-1:1987",
376 "nl_NL.ISO8859-1: ISO_8859-1:1987",
377 "no_NO.ISO8859-1: ISO_8859-1:1987",
378 "pl_PL.ISO8859-2: ISO_8859-2:1987",
379 "pl_PL: ISO_8859-2:1987",
380 "pt_PT.ISO8859-1: ISO_8859-1:1987",
381 "sk_SK.ISO8859-2: ISO_8859-2:1987",
382 "sk_SK: ISO_8859-2:1987",
383 "sv_SE.ISO8859-1: ISO_8859-1:1987",
386 "zh_HK.eucTW: cns11643_1",
388 "zh_TW.big5@chuyin: Big5",
389 "zh_TW.big5@radical: Big5",
390 "zh_TW.big5@stroke: Big5",
391 "zh_TW.eucTW: cns11643_1",
392 "zh_TW.eucTW@chuyin: cns11643_1",
393 "zh_TW.eucTW@radical: cns11643_1",
394 "zh_TW.eucTW@stroke: cns11643_1",
399 const char *CHARCONVTABLE
[] =
401 "! This table maps the host's locale names to IANA charsets",
403 "C: ISO_8859-1:1987",
404 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
405 "ja_JP.SJIS: Shift_JIS",
406 "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
407 "es_ES: ISO_8859-1:1987",
408 "es_ES.iso88591: ISO_8859-1:1987",
409 "sv_SE: ISO_8859-1:1987",
410 "sv_SE.iso88591: ISO_8859-1:1987",
411 "da_DK: ISO_8859-1:1987",
412 "da_DK.iso88591: ISO_8859-1:1987",
413 "nl_NL: ISO_8859-1:1987",
414 "nl_NL.iso88591: ISO_8859-1:1987",
415 "en: ISO_8859-1:1987",
416 "en_GB: ISO_8859-1:1987",
417 "en_GB.iso88591: ISO_8859-1:1987",
418 "en_US: ISO_8859-1:1987",
419 "en_US.iso88591: ISO_8859-1:1987",
420 "fi_FI: ISO_8859-1:1987",
421 "fi_FI.iso88591: ISO_8859-1:1987",
422 "fr_CA: ISO_8859-1:1987",
423 "fr_CA.iso88591: ISO_8859-1:1987",
424 "fr_FR: ISO_8859-1:1987",
425 "fr_FR.iso88591: ISO_8859-1:1987",
426 "de_DE: ISO_8859-1:1987",
427 "de_DE.iso88591: ISO_8859-1:1987",
428 "is_IS: ISO_8859-1:1987",
429 "is_IS.iso88591: ISO_8859-1:1987",
430 "it_IT: ISO_8859-1:1987",
431 "it_IT.iso88591: ISO_8859-1:1987",
432 "no_NO: ISO_8859-1:1987",
433 "no_NO.iso88591: ISO_8859-1:1987",
434 "pt_PT: ISO_8859-1:1987",
435 "pt_PT.iso88591: ISO_8859-1:1987",
436 "hu_HU: ISO_8859-2:1987",
437 "hu_HU.iso88592: ISO_8859-2:1987",
438 "cs_CZ: ISO_8859-2:1987",
439 "cs_CZ.iso88592: ISO_8859-2:1987",
440 "pl_PL: ISO_8859-2:1987",
441 "pl_PL.iso88592: ISO_8859-2:1987",
442 "ro_RO: ISO_8859-2:1987",
443 "ro_RO.iso88592: ISO_8859-2:1987",
444 "hr_HR: ISO_8859-2:1987",
445 "hr_HR.iso88592: ISO_8859-2:1987",
446 "sk_SK: ISO_8859-2:1987",
447 "sk_SK.iso88592: ISO_8859-2:1987",
448 "sl_SI: ISO_8859-2:1987",
449 "sl_SI.iso88592: ISO_8859-2:1987",
450 "american.iso88591: ISO_8859-1:1987",
451 "bulgarian: ISO_8859-2:1987",
452 "c-french.iso88591: ISO_8859-1:1987",
454 "chinese-t.big5: Big5",
455 "czech: ISO_8859-2:1987",
456 "danish.iso88591: ISO_8859-1:1987",
457 "dutch.iso88591: ISO_8859-1:1987",
458 "english.iso88591: ISO_8859-1:1987",
459 "finnish.iso88591: ISO_8859-1:1987",
460 "french.iso88591: ISO_8859-1:1987",
461 "german.iso88591: ISO_8859-1:1987",
462 "hungarian: ISO_8859-2:1987",
463 "icelandic.iso88591: ISO_8859-1:1987",
464 "italian.iso88591: ISO_8859-1:1987",
465 "japanese.euc: Extended_UNIX_Code_Packed_Format_for_Japanese",
466 "japanese: Shift_JIS",
467 "katakana: Shift_JIS",
469 "norwegian.iso88591: ISO_8859-1:1987",
470 "polish: ISO_8859-2:1987",
471 "portuguese.iso88591: ISO_8859-1:1987",
472 "rumanian: ISO_8859-2:1987",
473 "serbocroatian: ISO_8859-2:1987",
474 "slovene: ISO_8859-2:1987",
475 "spanish.iso88591: ISO_8859-1:1987",
476 "swedish.iso88591: ISO_8859-1:1987",
480 const char *CHARCONVTABLE
[] =
482 "! This table maps the host's locale names to IANA charsets",
484 "C: ISO_8859-1:1987",
485 "En_JP.IBM-932: Shift_JIS",
487 "Ja_JP.IBM-932: Shift_JIS",
489 "da_DK.ISO8859-1: ISO_8859-1:1987",
490 "da_DK: ISO_8859-1:1987",
491 "de_CH.ISO8859-1: ISO_8859-1:1987",
492 "de_CH: ISO_8859-1:1987",
493 "de_DE.ISO8859-1: ISO_8859-1:1987",
494 "de_DE: ISO_8859-1:1987",
495 "en_GB.ISO8859-1: ISO_8859-1:1987",
496 "en_GB: ISO_8859-1:1987",
497 "en_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
498 "en_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
499 "en_KR.IBM-eucKR: EUC-KR",
501 "en_TW.IBM-eucTW: cns11643_1",
503 "en_US.ISO8859-1: ISO_8859-1:1987",
504 "en_US: ISO_8859-1:1987",
505 "es_ES.ISO8859-1: ISO_8859-1:1987",
506 "es_ES: ISO_8859-1:1987",
507 "fi_FI.ISO8859-1: ISO_8859-1:1987",
508 "fi_FI: ISO_8859-1:1987",
509 "fr_BE.ISO8859-1: ISO_8859-1:1987",
510 "fr_BE: ISO_8859-1:1987",
511 "fr_CA.ISO8859-1: ISO_8859-1:1987",
512 "fr_CA: ISO_8859-1:1987",
513 "fr_CH.ISO8859-1: ISO_8859-1:1987",
514 "fr_CH: ISO_8859-1:1987",
515 "fr_FR.ISO8859-1: ISO_8859-1:1987",
516 "fr_FR: ISO_8859-1:1987",
517 "is_IS.ISO8859-1: ISO_8859-1:1987",
518 "is_IS: ISO_8859-1:1987",
519 "it_IT.ISO8859-1: ISO_8859-1:1987",
520 "it_IT: ISO_8859-1:1987",
521 "ja_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
522 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
523 "ko_KR.IBM-eucKR: EUC-KR",
525 "nl_BE.ISO8859-1: ISO_8859-1:1987",
526 "nl_BE: ISO_8859-1:1987",
527 "nl_NL.ISO8859-1: ISO_8859-1:1987",
528 "nl_NL: ISO_8859-1:1987",
529 "no_NO.ISO8859-1: ISO_8859-1:1987",
530 "no_NO: ISO_8859-1:1987",
531 "pt_PT.ISO8859-1: ISO_8859-1:1987",
532 "pt_PT: ISO_8859-1:1987",
533 "sv_SE.ISO8859-1: ISO_8859-1:1987",
534 "sv_SE: ISO_8859-1:1987",
535 "zh_TW.IBM-eucTW: cns11643_1",
539 #else // sunos by default
540 const char *CHARCONVTABLE
[] =
542 "! This table maps the host's locale names to IANA charsets",
544 "C: ISO_8859-1:1987",
545 "de: ISO_8859-1:1987",
546 "en_US: ISO_8859-1:1987",
547 "es: ISO_8859-1:1987",
548 "fr: ISO_8859-1:1987",
549 "iso_8859_1: ISO_8859-1:1987",
550 "it: ISO_8859-1:1987",
551 "ja: Extended_UNIX_Code_Packed_Format_for_Japanese",
552 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
553 "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese",
555 "sv: ISO_8859-1:1987",
565 GetCharsetFromLocale(char *locale
)
567 char *tmpcharset
= NULL
;
573 line
= CHARCONVTABLE
[i
];
582 line
= CHARCONVTABLE
[++i
];
584 if (strlen(buf
) == 0 || buf
[0] == '!')
588 p
= strchr(buf
, ':');
595 if (strcmp(buf
, locale
) == 0) {
596 while (*++p
== ' ' || *p
== '\t')
599 tmpcharset
= strdup(p
);
609 #endif /* Not defined XP_WIN32 */
612 char *_convertor(const char *instr
, int bFromUTF8
)
615 int inlen
, wclen
, outlen
;
621 if ((inlen
= strlen(instr
)) <= 0)
624 /* output never becomes longer than input,
625 * thus we don't have to ask for the length
627 wcstr
= (LPWSTR
) malloc( sizeof( WCHAR
) * (inlen
+1) );
631 wclen
= MultiByteToWideChar(bFromUTF8
? CP_UTF8
: CP_ACP
, 0, instr
,
632 inlen
, wcstr
, inlen
);
633 outlen
= WideCharToMultiByte(bFromUTF8
? CP_ACP
: CP_UTF8
, 0, wcstr
,
634 wclen
, NULL
, 0, NULL
, NULL
);
637 outstr
= (char *) malloc(outlen
+ 2);
638 outlen
= WideCharToMultiByte(bFromUTF8
? CP_ACP
: CP_UTF8
, 0, wcstr
,
639 wclen
, outstr
, outlen
, NULL
, NULL
);
641 *(outstr
+outlen
) = _T('\0');
651 ldaptool_local2UTF8( const char *src
)
655 char *locale
, *newcharset
;
656 size_t outLen
, resultLen
;
657 UErrorCode err
= U_ZERO_ERROR
;
664 else if (*src
== 0 || (ldaptool_charset
== NULL
)
665 || (!strcmp( ldaptool_charset
, "" )))
667 /* no option specified, so assume it's already in utf-8 */
672 if( !strcmp( ldaptool_charset
, "0" )
675 /* zero option specified, so try to get default codepage
676 this sucker is strdup'd immediately so it's OK to cast */
677 newcharset
= (char *)ucnv_getDefaultName();
678 if (newcharset
!= NULL
) {
679 free( ldaptool_charset
);
680 /* the default codepage lives in ICU */
681 ldaptool_charset
= strdup(newcharset
);
682 if (ldaptool_charset
== NULL
) {
689 if( strcmp( ldaptool_charset
, "" ) && (!charsetset
) )
691 /* -i option specified with charset name */
695 /* do the preflight - get the size needed for the target buffer */
696 outLen
= (size_t) ucnv_convert( "utf-8", ldaptool_charset
, NULL
, 0, src
,
697 strlen( src
) * sizeof(char), &err
);
699 if ((err
!= U_BUFFER_OVERFLOW_ERROR
) || (outLen
== 0)) {
700 /* default to just a copy of the string - this covers
701 the case of an illegal charset also */
705 utf8
= (char *) malloc( outLen
+ 1);
707 /* if we're already out of memory, does strdup just return NULL? */
711 /* do the actual conversion this time */
713 resultLen
= ucnv_convert( "utf-8", ldaptool_charset
, utf8
, (outLen
+ 1), src
,
714 strlen(src
) * sizeof(char), &err
);
716 if (!U_SUCCESS(err
)) {
722 utf8
= _convertor(src
, FALSE
);
729 #endif /* HAVE_LIBICU */