Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / cmd / ldap / common / convutf8.c
blob164687a4c389bc69f0597e583cf572da9f65e733
1 /*
2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
6 /*
7 * The contents of this file are subject to the Netscape Public
8 * License Version 1.1 (the "License"); you may not use this file
9 * except in compliance with the License. You may obtain a copy of
10 * the License at http://www.mozilla.org/NPL/
12 * Software distributed under the License is distributed on an "AS
13 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
14 * implied. See the License for the specific language governing
15 * rights and limitations under the License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is Netscape
21 * Communications Corporation. Portions created by Netscape are
22 * Copyright (C) 1998-1999 Netscape Communications Corporation. All
23 * Rights Reserved.
25 * Contributor(s):
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <locale.h>
32 #include <ctype.h>
34 #ifndef HAVE_LIBICU
36 #ifdef SOLARIS_LDAP_CMD
37 #include <errno.h>
38 #include <langinfo.h>
39 #include <iconv.h>
40 #endif
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
46 extern char *ldaptool_charset;
47 char *ldaptool_convdir = NULL;
48 static int charsetset = 0;
49 char *ldaptool_local2UTF8( const char *src );
51 #ifdef SOLARIS_LDAP_CMD
52 static char *ldaptool_convert( const char *src, const char *fcode,
53 const char *tcode);
54 char *ldaptool_UTF82local( const char *src );
55 #endif /* SOLARIS_LDAP_CMD */
57 #ifdef SOLARIS_LDAP_CMD
59 * ICU version always returns string, unless strdup fails.
60 * As in ICU version, in case of error strdup(src)
61 * Usually strdup(src) will be ASCII and legal anyways.
64 static char *
65 ldaptool_convert( const char *src, const char *fcode,
66 const char *tcode) {
67 char *dest, *tptr, *tmp;
68 const char *fptr;
69 iconv_t cd;
70 size_t ileft, oleft, ret, size;
72 if (src == NULL)
73 return (NULL);
75 if (fcode == NULL || tcode == NULL)
76 return (strdup(src));
78 if (strcasecmp(fcode, tcode) == 0)
79 return (strdup(src));
81 if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) {
82 /* conversion table not available */
83 return (strdup(src));
86 ileft = strlen(src);
87 oleft = 2 * ileft;
88 size = oleft;
89 ret = -1;
90 if ((dest = (char *)malloc(size)) == NULL) {
91 (void) iconv_close(cd);
92 /* maybe sizeof strlen(src) memory still exists */
93 return (strdup(src));
95 tptr = dest;
96 fptr = src;
98 for (;;) {
99 ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
101 if (ret != (size_t)-1) {
103 * Success. Place 'cd' into its initial shift
104 * state before returning.
106 if (fptr == NULL) /* already in initial state */
107 break;
108 fptr = NULL;
109 ileft = 0;
110 continue;
111 } if (errno == E2BIG) {
113 * Lack of space in output buffer.
114 * Hence double the size and retry.
115 * But before calling iconv(), oleft
116 * and tptr have to re-adjusted, so that
117 * iconv() doesn't overwrite the data
118 * which has already been converted.
120 oleft += size;
121 size *= 2;
122 if ((tmp = (char *) realloc(dest, size)) == NULL)
123 break;
124 tptr = tmp + (tptr - dest);
125 dest = tmp;
126 continue;
127 } else {
128 /* Other errors */
129 break;
133 if (dest != NULL) {
134 if (ret == -1) {
135 /* Free malloc'ed memory on failure */
136 free(dest);
137 dest = NULL;
138 } else if (oleft > 0) {
139 /* NULL terminate the return value */
140 *(dest + (size - oleft)) = '\0';
141 } else {
142 /* realloc one more byte and NULL terminate */
143 if ((tmp = (char *) realloc(dest, size + 1)) == NULL) {
144 free(dest);
145 dest = NULL;
146 } else {
147 *(dest + size) = '\0';
152 (void) iconv_close(cd);
153 if (dest == NULL) {
154 /* last chance in case some other failure along the way occurs */
155 return (strdup(src));
157 return (dest);
160 char *
161 ldaptool_UTF82local( const char *src )
163 char *to_code;
164 if ((to_code = nl_langinfo(CODESET)) == NULL)
165 return (strdup(src));
166 return (ldaptool_convert(src, "UTF-8", (const char *)to_code));
168 #endif /* SOLARIS_LDAP_CMD */
170 char *
171 ldaptool_local2UTF8( const char *src )
173 #ifdef SOLARIS_LDAP_CMD
174 char *from_code;
175 if ((from_code = nl_langinfo(CODESET)) == NULL)
176 return (strdup(src));
177 return (ldaptool_convert(src, (const char *)from_code, "UTF-8"));
178 #else
179 char *utf8;
180 charsetset = 0;
181 if (src == NULL)
183 return NULL;
185 utf8 = strdup(src);
186 return ( utf8 );
187 #endif /* SOLARIS_LDAP_CMD */
190 #else /* HAVE_LIBICU */
192 #include "unicode/utypes.h"
193 #include "unicode/ucnv.h"
195 #define NSPR20
197 #ifdef XP_WIN32
198 #define VC_EXTRALEAN
199 #include <afxwin.h>
200 #include <winnls.h>
201 #endif
203 extern char *ldaptool_charset;
204 static int charsetset = 0;
206 extern "C" {
207 char *ldaptool_convdir = NULL;
208 char *ldaptool_local2UTF8( const char * );
211 #ifndef XP_WIN32
212 char * GetNormalizedLocaleName(void);
215 char *
216 GetNormalizedLocaleName(void)
218 #ifdef _HPUX_SOURCE
220 int len;
221 char *locale;
223 locale = setlocale(LC_CTYPE, "");
224 if (locale && *locale) {
225 len = strlen(locale);
226 } else {
227 locale = "C";
228 len = 1;
231 if ((!strncmp(locale, "/\x03:", 3)) &&
232 (!strcmp(&locale[len - 2], ";/"))) {
233 locale += 3;
234 len -= 5;
237 locale = strdup(locale);
238 if (locale) {
239 locale[len] = 0;
242 return locale;
244 #else
246 char *locale;
248 locale = setlocale(LC_CTYPE, "");
249 if (locale && *locale) {
250 return strdup(locale);
253 return strdup("C");
255 #endif
258 #if defined(IRIX)
259 const char *CHARCONVTABLE[] =
261 "! This table maps the host's locale names to IANA charsets",
262 "!",
263 "C: ISO_8859-1:1987",
264 "cs: ISO_8859-2:1987",
265 "da: ISO_8859-1:1987",
266 "de: ISO_8859-1:1987",
267 "de_AT: ISO_8859-1:1987",
268 "de_CH: ISO_8859-1:1987",
269 "en: ISO_8859-1:1987",
270 "en_AU: ISO_8859-1:1987",
271 "en_CA: ISO_8859-1:1987",
272 "en_TH: ISO_8859-1:1987",
273 "en_US: ISO_8859-1:1987",
274 "es: ISO_8859-1:1987",
275 "fi: ISO_8859-1:1987",
276 "fr: ISO_8859-1:1987",
277 "fr_BE: ISO_8859-1:1987",
278 "fr_CA: ISO_8859-1:1987",
279 "fr_CH: ISO_8859-1:1987",
280 "is: ISO_8859-1:1987",
281 "it: ISO_8859-1:1987",
282 "it_CH: ISO_8859-1:1987",
283 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
284 "ko_KR.euc: EUC-KR",
285 "nl: ISO_8859-1:1987",
286 "nl_BE: ISO_8859-1:1987",
287 "no: ISO_8859-1:1987",
288 "pl: ISO_8859-2:1987",
289 "pt: ISO_8859-1:1987",
290 "sh: ISO_8859-2:1987",
291 "sk: ISO_8859-2:1987",
292 "sv: ISO_8859-1:1987",
293 "zh_CN.ugb: GB2312",
294 "zh_TW.ucns: cns11643_1",
295 NULL
297 #elif defined(SOLARIS)
298 const char *CHARCONVTABLE[] =
300 "! This table maps the host's locale names to IANA charsets",
301 "!",
302 "C: ISO_8859-1:1987",
303 "ja: Extended_UNIX_Code_Packed_Format_for_Japanese",
304 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
305 "ja_JP.PCK: Shift_JIS",
306 "en: ISO_8859-1:1987",
307 "en_AU: ISO_8859-1:1987",
308 "en_CA: ISO_8859-1:1987",
309 "en_UK: ISO_8859-1:1987",
310 "en_US: ISO_8859-1:1987",
311 "es: ISO_8859-1:1987",
312 "es_AR: ISO_8859-1:1987",
313 "es_BO: ISO_8859-1:1987",
314 "es_CL: ISO_8859-1:1987",
315 "es_CO: ISO_8859-1:1987",
316 "es_CR: ISO_8859-1:1987",
317 "es_EC: ISO_8859-1:1987",
318 "es_GT: ISO_8859-1:1987",
319 "es_MX: ISO_8859-1:1987",
320 "es_NI: ISO_8859-1:1987",
321 "es_PA: ISO_8859-1:1987",
322 "es_PE: ISO_8859-1:1987",
323 "es_PY: ISO_8859-1:1987",
324 "es_SV: ISO_8859-1:1987",
325 "es_UY: ISO_8859-1:1987",
326 "es_VE: ISO_8859-1:1987",
327 "fr: ISO_8859-1:1987",
328 "fr_BE: ISO_8859-1:1987",
329 "fr_CA: ISO_8859-1:1987",
330 "fr_CH: ISO_8859-1:1987",
331 "de: ISO_8859-1:1987",
332 "de_AT: ISO_8859-1:1987",
333 "de_CH: ISO_8859-1:1987",
334 "nl: ISO_8859-1:1987",
335 "nl_BE: ISO_8859-1:1987",
336 "it: ISO_8859-1:1987",
337 "sv: ISO_8859-1:1987",
338 "no: ISO_8859-1:1987",
339 "da: ISO_8859-1:1987",
340 "iso_8859_1: ISO_8859-1:1987",
341 "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese",
342 "ko: EUC-KR",
343 "zh: GB2312",
344 "zh_TW: cns11643_1",
345 NULL
347 #elif defined(OSF1)
348 const char *CHARCONVTABLE[] =
350 "! This table maps the host's locale names to IANA charsets",
351 "!",
352 "C: ISO_8859-1:1987",
353 "cs_CZ.ISO8859-2: ISO_8859-2:1987",
354 "cs_CZ: ISO_8859-2:1987",
355 "da_DK.ISO8859-1: ISO_8859-1:1987",
356 "de_CH.ISO8859-1: ISO_8859-1:1987",
357 "de_DE.ISO8859-1: ISO_8859-1:1987",
358 "en_GB.ISO8859-1: ISO_8859-1:1987",
359 "en_US.ISO8859-1: ISO_8859-1:1987",
360 "es_ES.ISO8859-1: ISO_8859-1:1987",
361 "fi_FI.ISO8859-1: ISO_8859-1:1987",
362 "fr_BE.ISO8859-1: ISO_8859-1:1987",
363 "fr_CA.ISO8859-1: ISO_8859-1:1987",
364 "fr_CH.ISO8859-1: ISO_8859-1:1987",
365 "fr_FR.ISO8859-1: ISO_8859-1:1987",
366 "hu_HU.ISO8859-2: ISO_8859-2:1987",
367 "hu_HU: ISO_8859-2:1987",
368 "is_IS.ISO8859-1: ISO_8859-1:1987",
369 "it_IT.ISO8859-1: ISO_8859-1:1987",
370 "ja_JP.SJIS: Shift_JIS",
371 "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
372 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
373 "ko_KR.eucKR: EUC-KR",
374 "ko_KR: EUC-KR",
375 "nl_BE.ISO8859-1: ISO_8859-1:1987",
376 "nl_NL.ISO8859-1: ISO_8859-1:1987",
377 "no_NO.ISO8859-1: ISO_8859-1:1987",
378 "pl_PL.ISO8859-2: ISO_8859-2:1987",
379 "pl_PL: ISO_8859-2:1987",
380 "pt_PT.ISO8859-1: ISO_8859-1:1987",
381 "sk_SK.ISO8859-2: ISO_8859-2:1987",
382 "sk_SK: ISO_8859-2:1987",
383 "sv_SE.ISO8859-1: ISO_8859-1:1987",
384 "zh_CN: GB2312",
385 "zh_HK.big5: Big5",
386 "zh_HK.eucTW: cns11643_1",
387 "zh_TW.big5: Big5",
388 "zh_TW.big5@chuyin: Big5",
389 "zh_TW.big5@radical: Big5",
390 "zh_TW.big5@stroke: Big5",
391 "zh_TW.eucTW: cns11643_1",
392 "zh_TW.eucTW@chuyin: cns11643_1",
393 "zh_TW.eucTW@radical: cns11643_1",
394 "zh_TW.eucTW@stroke: cns11643_1",
395 "zh_TW: cns11643_1",
396 NULL
398 #elif defined(HPUX)
399 const char *CHARCONVTABLE[] =
401 "! This table maps the host's locale names to IANA charsets",
402 "!",
403 "C: ISO_8859-1:1987",
404 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
405 "ja_JP.SJIS: Shift_JIS",
406 "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
407 "es_ES: ISO_8859-1:1987",
408 "es_ES.iso88591: ISO_8859-1:1987",
409 "sv_SE: ISO_8859-1:1987",
410 "sv_SE.iso88591: ISO_8859-1:1987",
411 "da_DK: ISO_8859-1:1987",
412 "da_DK.iso88591: ISO_8859-1:1987",
413 "nl_NL: ISO_8859-1:1987",
414 "nl_NL.iso88591: ISO_8859-1:1987",
415 "en: ISO_8859-1:1987",
416 "en_GB: ISO_8859-1:1987",
417 "en_GB.iso88591: ISO_8859-1:1987",
418 "en_US: ISO_8859-1:1987",
419 "en_US.iso88591: ISO_8859-1:1987",
420 "fi_FI: ISO_8859-1:1987",
421 "fi_FI.iso88591: ISO_8859-1:1987",
422 "fr_CA: ISO_8859-1:1987",
423 "fr_CA.iso88591: ISO_8859-1:1987",
424 "fr_FR: ISO_8859-1:1987",
425 "fr_FR.iso88591: ISO_8859-1:1987",
426 "de_DE: ISO_8859-1:1987",
427 "de_DE.iso88591: ISO_8859-1:1987",
428 "is_IS: ISO_8859-1:1987",
429 "is_IS.iso88591: ISO_8859-1:1987",
430 "it_IT: ISO_8859-1:1987",
431 "it_IT.iso88591: ISO_8859-1:1987",
432 "no_NO: ISO_8859-1:1987",
433 "no_NO.iso88591: ISO_8859-1:1987",
434 "pt_PT: ISO_8859-1:1987",
435 "pt_PT.iso88591: ISO_8859-1:1987",
436 "hu_HU: ISO_8859-2:1987",
437 "hu_HU.iso88592: ISO_8859-2:1987",
438 "cs_CZ: ISO_8859-2:1987",
439 "cs_CZ.iso88592: ISO_8859-2:1987",
440 "pl_PL: ISO_8859-2:1987",
441 "pl_PL.iso88592: ISO_8859-2:1987",
442 "ro_RO: ISO_8859-2:1987",
443 "ro_RO.iso88592: ISO_8859-2:1987",
444 "hr_HR: ISO_8859-2:1987",
445 "hr_HR.iso88592: ISO_8859-2:1987",
446 "sk_SK: ISO_8859-2:1987",
447 "sk_SK.iso88592: ISO_8859-2:1987",
448 "sl_SI: ISO_8859-2:1987",
449 "sl_SI.iso88592: ISO_8859-2:1987",
450 "american.iso88591: ISO_8859-1:1987",
451 "bulgarian: ISO_8859-2:1987",
452 "c-french.iso88591: ISO_8859-1:1987",
453 "chinese-s: GB2312",
454 "chinese-t.big5: Big5",
455 "czech: ISO_8859-2:1987",
456 "danish.iso88591: ISO_8859-1:1987",
457 "dutch.iso88591: ISO_8859-1:1987",
458 "english.iso88591: ISO_8859-1:1987",
459 "finnish.iso88591: ISO_8859-1:1987",
460 "french.iso88591: ISO_8859-1:1987",
461 "german.iso88591: ISO_8859-1:1987",
462 "hungarian: ISO_8859-2:1987",
463 "icelandic.iso88591: ISO_8859-1:1987",
464 "italian.iso88591: ISO_8859-1:1987",
465 "japanese.euc: Extended_UNIX_Code_Packed_Format_for_Japanese",
466 "japanese: Shift_JIS",
467 "katakana: Shift_JIS",
468 "korean: EUC-KR",
469 "norwegian.iso88591: ISO_8859-1:1987",
470 "polish: ISO_8859-2:1987",
471 "portuguese.iso88591: ISO_8859-1:1987",
472 "rumanian: ISO_8859-2:1987",
473 "serbocroatian: ISO_8859-2:1987",
474 "slovene: ISO_8859-2:1987",
475 "spanish.iso88591: ISO_8859-1:1987",
476 "swedish.iso88591: ISO_8859-1:1987",
477 NULL
479 #elif defined(AIX)
480 const char *CHARCONVTABLE[] =
482 "! This table maps the host's locale names to IANA charsets",
483 "!",
484 "C: ISO_8859-1:1987",
485 "En_JP.IBM-932: Shift_JIS",
486 "En_JP: Shift_JIS",
487 "Ja_JP.IBM-932: Shift_JIS",
488 "Ja_JP: Shift_JIS",
489 "da_DK.ISO8859-1: ISO_8859-1:1987",
490 "da_DK: ISO_8859-1:1987",
491 "de_CH.ISO8859-1: ISO_8859-1:1987",
492 "de_CH: ISO_8859-1:1987",
493 "de_DE.ISO8859-1: ISO_8859-1:1987",
494 "de_DE: ISO_8859-1:1987",
495 "en_GB.ISO8859-1: ISO_8859-1:1987",
496 "en_GB: ISO_8859-1:1987",
497 "en_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
498 "en_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
499 "en_KR.IBM-eucKR: EUC-KR",
500 "en_KR: EUC-KR",
501 "en_TW.IBM-eucTW: cns11643_1",
502 "en_TW: cns11643_1",
503 "en_US.ISO8859-1: ISO_8859-1:1987",
504 "en_US: ISO_8859-1:1987",
505 "es_ES.ISO8859-1: ISO_8859-1:1987",
506 "es_ES: ISO_8859-1:1987",
507 "fi_FI.ISO8859-1: ISO_8859-1:1987",
508 "fi_FI: ISO_8859-1:1987",
509 "fr_BE.ISO8859-1: ISO_8859-1:1987",
510 "fr_BE: ISO_8859-1:1987",
511 "fr_CA.ISO8859-1: ISO_8859-1:1987",
512 "fr_CA: ISO_8859-1:1987",
513 "fr_CH.ISO8859-1: ISO_8859-1:1987",
514 "fr_CH: ISO_8859-1:1987",
515 "fr_FR.ISO8859-1: ISO_8859-1:1987",
516 "fr_FR: ISO_8859-1:1987",
517 "is_IS.ISO8859-1: ISO_8859-1:1987",
518 "is_IS: ISO_8859-1:1987",
519 "it_IT.ISO8859-1: ISO_8859-1:1987",
520 "it_IT: ISO_8859-1:1987",
521 "ja_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
522 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
523 "ko_KR.IBM-eucKR: EUC-KR",
524 "ko_KR: EUC-KR",
525 "nl_BE.ISO8859-1: ISO_8859-1:1987",
526 "nl_BE: ISO_8859-1:1987",
527 "nl_NL.ISO8859-1: ISO_8859-1:1987",
528 "nl_NL: ISO_8859-1:1987",
529 "no_NO.ISO8859-1: ISO_8859-1:1987",
530 "no_NO: ISO_8859-1:1987",
531 "pt_PT.ISO8859-1: ISO_8859-1:1987",
532 "pt_PT: ISO_8859-1:1987",
533 "sv_SE.ISO8859-1: ISO_8859-1:1987",
534 "sv_SE: ISO_8859-1:1987",
535 "zh_TW.IBM-eucTW: cns11643_1",
536 "zh_TW: cns11643_1",
537 NULL
539 #else // sunos by default
540 const char *CHARCONVTABLE[] =
542 "! This table maps the host's locale names to IANA charsets",
543 "!",
544 "C: ISO_8859-1:1987",
545 "de: ISO_8859-1:1987",
546 "en_US: ISO_8859-1:1987",
547 "es: ISO_8859-1:1987",
548 "fr: ISO_8859-1:1987",
549 "iso_8859_1: ISO_8859-1:1987",
550 "it: ISO_8859-1:1987",
551 "ja: Extended_UNIX_Code_Packed_Format_for_Japanese",
552 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
553 "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese",
554 "ko: EUC-KR",
555 "sv: ISO_8859-1:1987",
556 "zh: GB2312",
557 "zh_TW: cns11643_1",
558 NULL
560 #endif
562 #define BSZ 256
564 char *
565 GetCharsetFromLocale(char *locale)
567 char *tmpcharset = NULL;
568 char buf[BSZ];
569 char *p;
570 const char *line;
571 int i=0;
573 line = CHARCONVTABLE[i];
574 while (line != NULL)
576 if (*line == 0)
578 break;
581 strcpy(buf, line);
582 line = CHARCONVTABLE[++i];
584 if (strlen(buf) == 0 || buf[0] == '!')
586 continue;
588 p = strchr(buf, ':');
589 if (p == NULL)
591 tmpcharset = NULL;
592 break;
594 *p = 0;
595 if (strcmp(buf, locale) == 0) {
596 while (*++p == ' ' || *p == '\t')
598 if (isalpha(*p)) {
599 tmpcharset = strdup(p);
600 } else
601 tmpcharset = NULL;
603 break;
606 return tmpcharset;
609 #endif /* Not defined XP_WIN32 */
611 #ifdef XP_WIN32
612 char *_convertor(const char *instr, int bFromUTF8)
614 char *outstr = NULL;
615 int inlen, wclen, outlen;
616 LPWSTR wcstr;
618 if (instr == NULL)
619 return NULL;
621 if ((inlen = strlen(instr)) <= 0)
622 return NULL;
624 /* output never becomes longer than input,
625 * thus we don't have to ask for the length
627 wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) );
628 if (!wcstr)
629 return NULL;
631 wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr,
632 inlen, wcstr, inlen);
633 outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
634 wclen, NULL, 0, NULL, NULL);
636 if (outlen > 0) {
637 outstr = (char *) malloc(outlen + 2);
638 outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
639 wclen, outstr, outlen, NULL, NULL);
640 if (outlen > 0)
641 *(outstr+outlen) = _T('\0');
642 else
643 return NULL;
645 free( wcstr );
646 return outstr;
648 #endif
650 char *
651 ldaptool_local2UTF8( const char *src )
653 char *utf8;
654 #ifndef XP_WIN32
655 char *locale, *newcharset;
656 size_t outLen, resultLen;
657 UErrorCode err = U_ZERO_ERROR;
658 UConverter *cnv;
660 if (src == NULL)
662 return NULL;
664 else if (*src == 0 || (ldaptool_charset == NULL)
665 || (!strcmp( ldaptool_charset, "" )))
667 /* no option specified, so assume it's already in utf-8 */
668 utf8 = strdup(src);
669 return utf8;
672 if( !strcmp( ldaptool_charset, "0" )
673 && (!charsetset) )
675 /* zero option specified, so try to get default codepage
676 this sucker is strdup'd immediately so it's OK to cast */
677 newcharset = (char *)ucnv_getDefaultName();
678 if (newcharset != NULL) {
679 free( ldaptool_charset );
680 /* the default codepage lives in ICU */
681 ldaptool_charset = strdup(newcharset);
682 if (ldaptool_charset == NULL) {
683 return strdup(src);
686 charsetset = 1;
688 else
689 if( strcmp( ldaptool_charset, "" ) && (!charsetset) )
691 /* -i option specified with charset name */
692 charsetset = 1;
695 /* do the preflight - get the size needed for the target buffer */
696 outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src,
697 strlen( src ) * sizeof(char), &err);
699 if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) {
700 /* default to just a copy of the string - this covers
701 the case of an illegal charset also */
702 return strdup(src);
705 utf8 = (char *) malloc( outLen + 1);
706 if( utf8 == NULL ) {
707 /* if we're already out of memory, does strdup just return NULL? */
708 return strdup(src);
711 /* do the actual conversion this time */
712 err = U_ZERO_ERROR;
713 resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src,
714 strlen(src) * sizeof(char), &err );
716 if (!U_SUCCESS(err)) {
717 free(utf8);
718 return strdup(src);
721 #else
722 utf8 = _convertor(src, FALSE);
723 if( utf8 == NULL )
724 utf8 = strdup(src);
725 #endif
727 return utf8;
729 #endif /* HAVE_LIBICU */
731 #ifndef HAVE_LIBICU
732 #ifdef __cplusplus
734 #endif
735 #endif