2 * Copyright (C) 1999-2007 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
26 #include "localcharset.h"
30 * Consider all system dependent encodings, for any system,
31 * and the extra encodings.
39 * Consider those system dependent encodings that are needed for the
45 #if defined(__osf__) || defined(VMS)
48 #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
54 * Data type for general conversion loop.
57 size_t (*loop_convert
) (iconv_t icd
,
58 const char* * inbuf
, size_t *inbytesleft
,
59 char* * outbuf
, size_t *outbytesleft
);
60 size_t (*loop_reset
) (iconv_t icd
,
61 char* * outbuf
, size_t *outbytesleft
);
67 #include "converters.h"
70 * Transliteration tables.
72 #include "cjk_variants.h"
76 * Table of all supported encodings.
79 struct mbtowc_funcs ifuncs
; /* conversion multibyte -> unicode */
80 struct wctomb_funcs ofuncs
; /* conversion unicode -> multibyte */
81 int oflags
; /* flags for unicode -> multibyte conversion */
84 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
86 #include "encodings.def"
88 #include "encodings_aix.def"
91 #include "encodings_osf1.def"
94 #include "encodings_dos.def"
97 #include "encodings_extra.def"
99 #include "encodings_local.def"
101 ei_for_broken_compilers_that_dont_like_trailing_commas
104 static struct encoding
const all_encodings
[] = {
105 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
106 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
107 #include "encodings.def"
109 #include "encodings_aix.def"
112 #include "encodings_osf1.def"
115 #include "encodings_dos.def"
118 #include "encodings_extra.def"
121 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
122 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
123 #include "encodings_local.def"
133 * Alias lookup function.
135 * struct alias { int name; unsigned int encoding_index; };
136 * const struct alias * aliases_lookup (const char *str, unsigned int len);
137 * #define MAX_WORD_LENGTH ...
142 * System dependent alias lookup function.
144 * const struct alias * aliases2_lookup (const char *str);
146 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
147 struct stringpool2_t
{
148 #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
149 #include "aliases2.h"
152 static const struct stringpool2_t stringpool2_contents
= {
153 #define S(tag,name,encoding_index) name,
154 #include "aliases2.h"
157 #define stringpool2 ((const char *) &stringpool2_contents)
158 static const struct alias sysdep_aliases
[] = {
159 #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
160 #include "aliases2.h"
163 #if defined(__GNUC__) && !defined(DEBUG)
167 aliases2_lookup (register const char *str
)
169 const struct alias
* ptr
;
171 for (ptr
= sysdep_aliases
, count
= sizeof(sysdep_aliases
)/sizeof(sysdep_aliases
[0]); count
> 0; ptr
++, count
--)
172 if (!strcmp(str
, stringpool2
+ ptr
->name
))
177 #define aliases2_lookup(str) NULL
178 #define stringpool2 NULL
182 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
183 and the first string can be assumed to be in uppercase. */
184 static int strequal (const char* str1
, const char* str2
)
189 c1
= * (unsigned char *) str1
++;
190 c2
= * (unsigned char *) str2
++;
193 if (c2
>= 'a' && c2
<= 'z')
202 iconv_t
iconv_open (const char* tocode
, const char* fromcode
)
204 struct conv_struct
* cd
;
205 char buf
[MAX_WORD_LENGTH
+10+1];
208 const struct alias
* ap
;
210 unsigned int from_index
;
212 unsigned int to_index
;
214 int transliterate
= 0;
215 int discard_ilseq
= 0;
217 /* Before calling aliases_lookup, convert the input string to upper case,
218 * and check whether it's entirely ASCII (we call gperf with option "-7"
219 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
220 * or if it's too long, it is not a valid encoding name.
222 for (to_wchar
= 0;;) {
223 /* Search tocode in the table. */
224 for (cp
= tocode
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
225 unsigned char c
= * (unsigned char *) cp
;
228 if (c
>= 'a' && c
<= 'z')
237 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
243 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
251 if (buf
[0] == '\0') {
252 tocode
= locale_charset();
253 /* Avoid an endless loop that could occur when using an older version
254 of localcharset.c. */
255 if (tocode
[0] == '\0')
259 ap
= aliases_lookup(buf
,bp
-buf
);
261 ap
= aliases2_lookup(buf
);
265 if (ap
->encoding_index
== ei_local_char
) {
266 tocode
= locale_charset();
267 /* Avoid an endless loop that could occur when using an older version
268 of localcharset.c. */
269 if (tocode
[0] == '\0')
273 if (ap
->encoding_index
== ei_local_wchar_t
) {
274 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
275 This is also the case on native Woe32 systems. */
276 #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
277 if (sizeof(wchar_t) == 4) {
278 to_index
= ei_ucs4internal
;
281 if (sizeof(wchar_t) == 2) {
282 to_index
= ei_ucs2internal
;
285 if (sizeof(wchar_t) == 1) {
286 to_index
= ei_iso8859_1
;
292 tocode
= locale_charset();
297 to_index
= ap
->encoding_index
;
300 for (from_wchar
= 0;;) {
301 /* Search fromcode in the table. */
302 for (cp
= fromcode
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
303 unsigned char c
= * (unsigned char *) cp
;
306 if (c
>= 'a' && c
<= 'z')
315 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
320 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
327 if (buf
[0] == '\0') {
328 fromcode
= locale_charset();
329 /* Avoid an endless loop that could occur when using an older version
330 of localcharset.c. */
331 if (fromcode
[0] == '\0')
335 ap
= aliases_lookup(buf
,bp
-buf
);
337 ap
= aliases2_lookup(buf
);
341 if (ap
->encoding_index
== ei_local_char
) {
342 fromcode
= locale_charset();
343 /* Avoid an endless loop that could occur when using an older version
344 of localcharset.c. */
345 if (fromcode
[0] == '\0')
349 if (ap
->encoding_index
== ei_local_wchar_t
) {
350 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
351 This is also the case on native Woe32 systems. */
352 #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
353 if (sizeof(wchar_t) == 4) {
354 from_index
= ei_ucs4internal
;
357 if (sizeof(wchar_t) == 2) {
358 from_index
= ei_ucs2internal
;
361 if (sizeof(wchar_t) == 1) {
362 from_index
= ei_iso8859_1
;
368 fromcode
= locale_charset();
373 from_index
= ap
->encoding_index
;
376 cd
= (struct conv_struct
*) malloc(from_wchar
!= to_wchar
377 ? sizeof(struct wchar_conv_struct
)
378 : sizeof(struct conv_struct
));
381 return (iconv_t
)(-1);
383 cd
->iindex
= from_index
;
384 cd
->ifuncs
= all_encodings
[from_index
].ifuncs
;
385 cd
->oindex
= to_index
;
386 cd
->ofuncs
= all_encodings
[to_index
].ofuncs
;
387 cd
->oflags
= all_encodings
[to_index
].oflags
;
388 /* Initialize the loop functions. */
393 cd
->lfuncs
.loop_convert
= wchar_id_loop_convert
;
394 cd
->lfuncs
.loop_reset
= wchar_id_loop_reset
;
398 cd
->lfuncs
.loop_convert
= wchar_to_loop_convert
;
399 cd
->lfuncs
.loop_reset
= wchar_to_loop_reset
;
406 cd
->lfuncs
.loop_convert
= wchar_from_loop_convert
;
407 cd
->lfuncs
.loop_reset
= wchar_from_loop_reset
;
411 cd
->lfuncs
.loop_convert
= unicode_loop_convert
;
412 cd
->lfuncs
.loop_reset
= unicode_loop_reset
;
415 /* Initialize the states. */
416 memset(&cd
->istate
,'\0',sizeof(state_t
));
417 memset(&cd
->ostate
,'\0',sizeof(state_t
));
418 /* Initialize the operation flags. */
419 cd
->transliterate
= transliterate
;
420 cd
->discard_ilseq
= discard_ilseq
;
421 #ifndef LIBICONV_PLUG
422 cd
->fallbacks
.mb_to_uc_fallback
= NULL
;
423 cd
->fallbacks
.uc_to_mb_fallback
= NULL
;
424 cd
->fallbacks
.mb_to_wc_fallback
= NULL
;
425 cd
->fallbacks
.wc_to_mb_fallback
= NULL
;
426 cd
->fallbacks
.data
= NULL
;
427 cd
->hooks
.uc_hook
= NULL
;
428 cd
->hooks
.wc_hook
= NULL
;
429 cd
->hooks
.data
= NULL
;
431 /* Initialize additional fields. */
432 if (from_wchar
!= to_wchar
) {
433 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) cd
;
434 memset(&wcd
->state
,'\0',sizeof(mbstate_t));
440 return (iconv_t
)(-1);
443 size_t iconv (iconv_t icd
,
444 ICONV_CONST
char* * inbuf
, size_t *inbytesleft
,
445 char* * outbuf
, size_t *outbytesleft
)
447 conv_t cd
= (conv_t
) icd
;
448 if (inbuf
== NULL
|| *inbuf
== NULL
)
449 return cd
->lfuncs
.loop_reset(icd
,outbuf
,outbytesleft
);
451 return cd
->lfuncs
.loop_convert(icd
,
452 (const char* *)inbuf
,inbytesleft
,
453 outbuf
,outbytesleft
);
456 int iconv_close (iconv_t icd
)
458 conv_t cd
= (conv_t
) icd
;
463 #ifndef LIBICONV_PLUG
465 int iconvctl (iconv_t icd
, int request
, void* argument
)
467 conv_t cd
= (conv_t
) icd
;
471 ((cd
->lfuncs
.loop_convert
== unicode_loop_convert
472 && cd
->iindex
== cd
->oindex
)
473 || cd
->lfuncs
.loop_convert
== wchar_id_loop_convert
476 case ICONV_GET_TRANSLITERATE
:
477 *(int *)argument
= cd
->transliterate
;
479 case ICONV_SET_TRANSLITERATE
:
480 cd
->transliterate
= (*(const int *)argument
? 1 : 0);
482 case ICONV_GET_DISCARD_ILSEQ
:
483 *(int *)argument
= cd
->discard_ilseq
;
485 case ICONV_SET_DISCARD_ILSEQ
:
486 cd
->discard_ilseq
= (*(const int *)argument
? 1 : 0);
488 case ICONV_SET_HOOKS
:
489 if (argument
!= NULL
) {
490 cd
->hooks
= *(const struct iconv_hooks
*)argument
;
492 cd
->hooks
.uc_hook
= NULL
;
493 cd
->hooks
.wc_hook
= NULL
;
494 cd
->hooks
.data
= NULL
;
497 case ICONV_SET_FALLBACKS
:
498 if (argument
!= NULL
) {
499 cd
->fallbacks
= *(const struct iconv_fallbacks
*)argument
;
501 cd
->fallbacks
.mb_to_uc_fallback
= NULL
;
502 cd
->fallbacks
.uc_to_mb_fallback
= NULL
;
503 cd
->fallbacks
.mb_to_wc_fallback
= NULL
;
504 cd
->fallbacks
.wc_to_mb_fallback
= NULL
;
505 cd
->fallbacks
.data
= NULL
;
514 /* An alias after its name has been converted from 'int' to 'const char*'. */
515 struct nalias
{ const char* name
; unsigned int encoding_index
; };
517 static int compare_by_index (const void * arg1
, const void * arg2
)
519 const struct nalias
* alias1
= (const struct nalias
*) arg1
;
520 const struct nalias
* alias2
= (const struct nalias
*) arg2
;
521 return (int)alias1
->encoding_index
- (int)alias2
->encoding_index
;
524 static int compare_by_name (const void * arg1
, const void * arg2
)
526 const char * name1
= *(const char **)arg1
;
527 const char * name2
= *(const char **)arg2
;
528 /* Compare alphabetically, but put "CS" names at the end. */
529 int sign
= strcmp(name1
,name2
);
531 sign
= ((name1
[0]=='C' && name1
[1]=='S') - (name2
[0]=='C' && name2
[1]=='S'))
532 * 4 + (sign
>= 0 ? 1 : -1);
537 void iconvlist (int (*do_one
) (unsigned int namescount
,
538 const char * const * names
,
542 #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
543 #ifndef aliases2_lookup
544 #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
546 #define aliascount2 0
548 #define aliascount (aliascount1+aliascount2)
549 struct nalias aliasbuf
[aliascount
];
550 const char * namesbuf
[aliascount
];
553 /* Put all existing aliases into a buffer. */
557 for (i
= 0; i
< aliascount1
; i
++) {
558 const struct alias
* p
= &aliases
[i
];
560 && p
->encoding_index
!= ei_local_char
561 && p
->encoding_index
!= ei_local_wchar_t
) {
562 aliasbuf
[j
].name
= stringpool
+ p
->name
;
563 aliasbuf
[j
].encoding_index
= p
->encoding_index
;
567 #ifndef aliases2_lookup
568 for (i
= 0; i
< aliascount2
; i
++) {
569 aliasbuf
[j
].name
= stringpool2
+ sysdep_aliases
[i
].name
;
570 aliasbuf
[j
].encoding_index
= sysdep_aliases
[i
].encoding_index
;
576 /* Sort by encoding_index. */
578 qsort(aliasbuf
, num_aliases
, sizeof(struct nalias
), compare_by_index
);
580 /* Process all aliases with the same encoding_index together. */
583 while (j
< num_aliases
) {
584 unsigned int ei
= aliasbuf
[j
].encoding_index
;
587 namesbuf
[i
++] = aliasbuf
[j
++].name
;
588 while (j
< num_aliases
&& aliasbuf
[j
].encoding_index
== ei
);
590 qsort(namesbuf
, i
, sizeof(const char *), compare_by_name
);
591 /* Call the callback. */
592 if (do_one(i
,namesbuf
,data
))
602 * Table of canonical names of encodings.
603 * Instead of strings, it contains offsets into stringpool and stringpool2.
605 static const unsigned short all_canonical
[] = {
606 #include "canonical.h"
608 #include "canonical_aix.h"
611 #include "canonical_osf1.h"
614 #include "canonical_dos.h"
617 #include "canonical_extra.h"
619 #include "canonical_local.h"
622 const char * iconv_canonicalize (const char * name
)
625 char buf
[MAX_WORD_LENGTH
+10+1];
628 const struct alias
* ap
;
633 /* Before calling aliases_lookup, convert the input string to upper case,
634 * and check whether it's entirely ASCII (we call gperf with option "-7"
635 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
636 * or if it's too long, it is not a valid encoding name.
638 for (code
= name
;;) {
639 /* Search code in the table. */
640 for (cp
= code
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
641 unsigned char c
= * (unsigned char *) cp
;
644 if (c
>= 'a' && c
<= 'z')
653 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
658 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
665 if (buf
[0] == '\0') {
666 code
= locale_charset();
667 /* Avoid an endless loop that could occur when using an older version
668 of localcharset.c. */
674 ap
= aliases_lookup(buf
,bp
-buf
);
677 ap
= aliases2_lookup(buf
);
681 if (ap
->encoding_index
== ei_local_char
) {
682 code
= locale_charset();
683 /* Avoid an endless loop that could occur when using an older version
684 of localcharset.c. */
689 if (ap
->encoding_index
== ei_local_wchar_t
) {
690 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
691 This is also the case on native Woe32 systems. */
692 #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
693 if (sizeof(wchar_t) == 4) {
694 index
= ei_ucs4internal
;
697 if (sizeof(wchar_t) == 2) {
698 index
= ei_ucs2internal
;
701 if (sizeof(wchar_t) == 1) {
702 index
= ei_iso8859_1
;
707 index
= ap
->encoding_index
;
710 return all_canonical
[index
] + pool
;
715 int _libiconv_version
= _LIBICONV_VERSION
;
717 #if defined __FreeBSD__ && !defined __gnu_freebsd__
718 /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
719 It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
720 #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
721 #define _strong_alias(name, aliasname) \
722 extern __typeof (name) aliasname __attribute__ ((alias (#name)));
726 strong_alias (libiconv_open
, iconv_open
)
727 strong_alias (libiconv
, iconv
)
728 strong_alias (libiconv_close
, iconv_close
)