Replace FSF snail-mail address with URL.
[libiconv.git] / lib / iconv.c
blob93dc42fb5fe397e4a0ddf9489141cda9991e4b07
1 /*
2 * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
20 #include <iconv.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "config.h"
25 #include "localcharset.h"
27 #ifdef __CYGWIN__
28 #include <cygwin/version.h>
29 #endif
31 #if ENABLE_EXTRA
33 * Consider all system dependent encodings, for any system,
34 * and the extra encodings.
36 #define USE_AIX
37 #define USE_OSF1
38 #define USE_DOS
39 #define USE_EXTRA
40 #else
42 * Consider those system dependent encodings that are needed for the
43 * current system.
45 #ifdef _AIX
46 #define USE_AIX
47 #endif
48 #if defined(__osf__) || defined(VMS)
49 #define USE_OSF1
50 #endif
51 #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
52 #define USE_DOS
53 #endif
54 #endif
57 * Data type for general conversion loop.
59 struct loop_funcs {
60 size_t (*loop_convert) (iconv_t icd,
61 const char* * inbuf, size_t *inbytesleft,
62 char* * outbuf, size_t *outbytesleft);
63 size_t (*loop_reset) (iconv_t icd,
64 char* * outbuf, size_t *outbytesleft);
68 * Converters.
70 #include "converters.h"
73 * Transliteration tables.
75 #include "cjk_variants.h"
76 #include "translit.h"
79 * Table of all supported encodings.
81 struct encoding {
82 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
83 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
84 int oflags; /* flags for unicode -> multibyte conversion */
86 #define DEFALIAS(xxx_alias,xxx) /* nothing */
87 enum {
88 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
89 ei_##xxx ,
90 #include "encodings.def"
91 #ifdef USE_AIX
92 # include "encodings_aix.def"
93 #endif
94 #ifdef USE_OSF1
95 # include "encodings_osf1.def"
96 #endif
97 #ifdef USE_DOS
98 # include "encodings_dos.def"
99 #endif
100 #ifdef USE_EXTRA
101 # include "encodings_extra.def"
102 #endif
103 #include "encodings_local.def"
104 #undef DEFENCODING
105 ei_for_broken_compilers_that_dont_like_trailing_commas
107 #include "flags.h"
108 static struct encoding const all_encodings[] = {
109 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
110 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
111 #include "encodings.def"
112 #ifdef USE_AIX
113 # include "encodings_aix.def"
114 #endif
115 #ifdef USE_OSF1
116 # include "encodings_osf1.def"
117 #endif
118 #ifdef USE_DOS
119 # include "encodings_dos.def"
120 #endif
121 #ifdef USE_EXTRA
122 # include "encodings_extra.def"
123 #endif
124 #undef DEFENCODING
125 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
126 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
127 #include "encodings_local.def"
128 #undef DEFENCODING
130 #undef DEFALIAS
133 * Conversion loops.
135 #include "loops.h"
138 * Alias lookup function.
139 * Defines
140 * struct alias { int name; unsigned int encoding_index; };
141 * const struct alias * aliases_lookup (const char *str, unsigned int len);
142 * #define MAX_WORD_LENGTH ...
144 #if defined _AIX
145 # include "aliases_sysaix.h"
146 #elif defined hpux || defined __hpux
147 # include "aliases_syshpux.h"
148 #elif defined __osf__
149 # include "aliases_sysosf1.h"
150 #elif defined __sun
151 # include "aliases_syssolaris.h"
152 #else
153 # include "aliases.h"
154 #endif
157 * System dependent alias lookup function.
158 * Defines
159 * const struct alias * aliases2_lookup (const char *str);
161 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
162 struct stringpool2_t {
163 #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
164 #include "aliases2.h"
165 #undef S
167 static const struct stringpool2_t stringpool2_contents = {
168 #define S(tag,name,encoding_index) name,
169 #include "aliases2.h"
170 #undef S
172 #define stringpool2 ((const char *) &stringpool2_contents)
173 static const struct alias sysdep_aliases[] = {
174 #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
175 #include "aliases2.h"
176 #undef S
178 #ifdef __GNUC__
179 __inline
180 #endif
181 const struct alias *
182 aliases2_lookup (register const char *str)
184 const struct alias * ptr;
185 unsigned int count;
186 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
187 if (!strcmp(str, stringpool2 + ptr->name))
188 return ptr;
189 return NULL;
191 #else
192 #define aliases2_lookup(str) NULL
193 #define stringpool2 NULL
194 #endif
196 #if 0
197 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
198 and the first string can be assumed to be in uppercase. */
199 static int strequal (const char* str1, const char* str2)
201 unsigned char c1;
202 unsigned char c2;
203 for (;;) {
204 c1 = * (unsigned char *) str1++;
205 c2 = * (unsigned char *) str2++;
206 if (c1 == 0)
207 break;
208 if (c2 >= 'a' && c2 <= 'z')
209 c2 -= 'a'-'A';
210 if (c1 != c2)
211 break;
213 return (c1 == c2);
215 #endif
217 iconv_t iconv_open (const char* tocode, const char* fromcode)
219 struct conv_struct * cd;
220 unsigned int from_index;
221 int from_wchar;
222 unsigned int to_index;
223 int to_wchar;
224 int transliterate;
225 int discard_ilseq;
227 #include "iconv_open1.h"
229 cd = (struct conv_struct *) malloc(from_wchar != to_wchar
230 ? sizeof(struct wchar_conv_struct)
231 : sizeof(struct conv_struct));
232 if (cd == NULL) {
233 errno = ENOMEM;
234 return (iconv_t)(-1);
237 #include "iconv_open2.h"
239 return (iconv_t)cd;
240 invalid:
241 errno = EINVAL;
242 return (iconv_t)(-1);
245 size_t iconv (iconv_t icd,
246 ICONV_CONST char* * inbuf, size_t *inbytesleft,
247 char* * outbuf, size_t *outbytesleft)
249 conv_t cd = (conv_t) icd;
250 if (inbuf == NULL || *inbuf == NULL)
251 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
252 else
253 return cd->lfuncs.loop_convert(icd,
254 (const char* *)inbuf,inbytesleft,
255 outbuf,outbytesleft);
258 int iconv_close (iconv_t icd)
260 conv_t cd = (conv_t) icd;
261 free(cd);
262 return 0;
265 #ifndef LIBICONV_PLUG
268 * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
269 * fit in an iconv_allocation_t.
270 * If this verification fails, iconv_allocation_t must be made larger and
271 * the major version in LIBICONV_VERSION_INFO must be bumped.
272 * Currently 'struct conv_struct' has 21 integer/pointer fields, and
273 * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
275 typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
276 typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
278 int iconv_open_into (const char* tocode, const char* fromcode,
279 iconv_allocation_t* resultp)
281 struct conv_struct * cd;
282 unsigned int from_index;
283 int from_wchar;
284 unsigned int to_index;
285 int to_wchar;
286 int transliterate;
287 int discard_ilseq;
289 #include "iconv_open1.h"
291 cd = (struct conv_struct *) resultp;
293 #include "iconv_open2.h"
295 return 0;
296 invalid:
297 errno = EINVAL;
298 return -1;
301 int iconvctl (iconv_t icd, int request, void* argument)
303 conv_t cd = (conv_t) icd;
304 switch (request) {
305 case ICONV_TRIVIALP:
306 *(int *)argument =
307 ((cd->lfuncs.loop_convert == unicode_loop_convert
308 && cd->iindex == cd->oindex)
309 || cd->lfuncs.loop_convert == wchar_id_loop_convert
310 ? 1 : 0);
311 return 0;
312 case ICONV_GET_TRANSLITERATE:
313 *(int *)argument = cd->transliterate;
314 return 0;
315 case ICONV_SET_TRANSLITERATE:
316 cd->transliterate = (*(const int *)argument ? 1 : 0);
317 return 0;
318 case ICONV_GET_DISCARD_ILSEQ:
319 *(int *)argument = cd->discard_ilseq;
320 return 0;
321 case ICONV_SET_DISCARD_ILSEQ:
322 cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
323 return 0;
324 case ICONV_SET_HOOKS:
325 if (argument != NULL) {
326 cd->hooks = *(const struct iconv_hooks *)argument;
327 } else {
328 cd->hooks.uc_hook = NULL;
329 cd->hooks.wc_hook = NULL;
330 cd->hooks.data = NULL;
332 return 0;
333 case ICONV_SET_FALLBACKS:
334 if (argument != NULL) {
335 cd->fallbacks = *(const struct iconv_fallbacks *)argument;
336 } else {
337 cd->fallbacks.mb_to_uc_fallback = NULL;
338 cd->fallbacks.uc_to_mb_fallback = NULL;
339 cd->fallbacks.mb_to_wc_fallback = NULL;
340 cd->fallbacks.wc_to_mb_fallback = NULL;
341 cd->fallbacks.data = NULL;
343 return 0;
344 default:
345 errno = EINVAL;
346 return -1;
350 /* An alias after its name has been converted from 'int' to 'const char*'. */
351 struct nalias { const char* name; unsigned int encoding_index; };
353 static int compare_by_index (const void * arg1, const void * arg2)
355 const struct nalias * alias1 = (const struct nalias *) arg1;
356 const struct nalias * alias2 = (const struct nalias *) arg2;
357 return (int)alias1->encoding_index - (int)alias2->encoding_index;
360 static int compare_by_name (const void * arg1, const void * arg2)
362 const char * name1 = *(const char **)arg1;
363 const char * name2 = *(const char **)arg2;
364 /* Compare alphabetically, but put "CS" names at the end. */
365 int sign = strcmp(name1,name2);
366 if (sign != 0) {
367 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
368 * 4 + (sign >= 0 ? 1 : -1);
370 return sign;
373 void iconvlist (int (*do_one) (unsigned int namescount,
374 const char * const * names,
375 void* data),
376 void* data)
378 #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
379 #ifndef aliases2_lookup
380 #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
381 #else
382 #define aliascount2 0
383 #endif
384 #define aliascount (aliascount1+aliascount2)
385 struct nalias aliasbuf[aliascount];
386 const char * namesbuf[aliascount];
387 size_t num_aliases;
389 /* Put all existing aliases into a buffer. */
390 size_t i;
391 size_t j;
392 j = 0;
393 for (i = 0; i < aliascount1; i++) {
394 const struct alias * p = &aliases[i];
395 if (p->name >= 0
396 && p->encoding_index != ei_local_char
397 && p->encoding_index != ei_local_wchar_t) {
398 aliasbuf[j].name = stringpool + p->name;
399 aliasbuf[j].encoding_index = p->encoding_index;
400 j++;
403 #ifndef aliases2_lookup
404 for (i = 0; i < aliascount2; i++) {
405 aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
406 aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
407 j++;
409 #endif
410 num_aliases = j;
412 /* Sort by encoding_index. */
413 if (num_aliases > 1)
414 qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
416 /* Process all aliases with the same encoding_index together. */
417 size_t j;
418 j = 0;
419 while (j < num_aliases) {
420 unsigned int ei = aliasbuf[j].encoding_index;
421 size_t i = 0;
423 namesbuf[i++] = aliasbuf[j++].name;
424 while (j < num_aliases && aliasbuf[j].encoding_index == ei);
425 if (i > 1)
426 qsort(namesbuf, i, sizeof(const char *), compare_by_name);
427 /* Call the callback. */
428 if (do_one(i,namesbuf,data))
429 break;
432 #undef aliascount
433 #undef aliascount2
434 #undef aliascount1
438 * Table of canonical names of encodings.
439 * Instead of strings, it contains offsets into stringpool and stringpool2.
441 static const unsigned short all_canonical[] = {
442 #if defined _AIX
443 # include "canonical_sysaix.h"
444 #elif defined hpux || defined __hpux
445 # include "canonical_syshpux.h"
446 #elif defined __osf__
447 # include "canonical_sysosf1.h"
448 #elif defined __sun
449 # include "canonical_syssolaris.h"
450 #else
451 # include "canonical.h"
452 #endif
453 #ifdef USE_AIX
454 # if defined _AIX
455 # include "canonical_aix_sysaix.h"
456 # else
457 # include "canonical_aix.h"
458 # endif
459 #endif
460 #ifdef USE_OSF1
461 # if defined __osf__
462 # include "canonical_osf1_sysosf1.h"
463 # else
464 # include "canonical_osf1.h"
465 # endif
466 #endif
467 #ifdef USE_DOS
468 # include "canonical_dos.h"
469 #endif
470 #ifdef USE_EXTRA
471 # include "canonical_extra.h"
472 #endif
473 #if defined _AIX
474 # include "canonical_local_sysaix.h"
475 #elif defined hpux || defined __hpux
476 # include "canonical_local_syshpux.h"
477 #elif defined __osf__
478 # include "canonical_local_sysosf1.h"
479 #elif defined __sun
480 # include "canonical_local_syssolaris.h"
481 #else
482 # include "canonical_local.h"
483 #endif
486 const char * iconv_canonicalize (const char * name)
488 const char* code;
489 char buf[MAX_WORD_LENGTH+10+1];
490 const char* cp;
491 char* bp;
492 const struct alias * ap;
493 unsigned int count;
494 unsigned int index;
495 const char* pool;
497 /* Before calling aliases_lookup, convert the input string to upper case,
498 * and check whether it's entirely ASCII (we call gperf with option "-7"
499 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
500 * or if it's too long, it is not a valid encoding name.
502 for (code = name;;) {
503 /* Search code in the table. */
504 for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
505 unsigned char c = * (unsigned char *) cp;
506 if (c >= 0x80)
507 goto invalid;
508 if (c >= 'a' && c <= 'z')
509 c -= 'a'-'A';
510 *bp = c;
511 if (c == '\0')
512 break;
513 if (--count == 0)
514 goto invalid;
516 for (;;) {
517 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
518 bp -= 10;
519 *bp = '\0';
520 continue;
522 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
523 bp -= 8;
524 *bp = '\0';
525 continue;
527 break;
529 if (buf[0] == '\0') {
530 code = locale_charset();
531 /* Avoid an endless loop that could occur when using an older version
532 of localcharset.c. */
533 if (code[0] == '\0')
534 goto invalid;
535 continue;
537 pool = stringpool;
538 ap = aliases_lookup(buf,bp-buf);
539 if (ap == NULL) {
540 pool = stringpool2;
541 ap = aliases2_lookup(buf);
542 if (ap == NULL)
543 goto invalid;
545 if (ap->encoding_index == ei_local_char) {
546 code = locale_charset();
547 /* Avoid an endless loop that could occur when using an older version
548 of localcharset.c. */
549 if (code[0] == '\0')
550 goto invalid;
551 continue;
553 if (ap->encoding_index == ei_local_wchar_t) {
554 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
555 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
556 we know that it is UTF-16. */
557 #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
558 if (sizeof(wchar_t) == 4) {
559 index = ei_ucs4internal;
560 break;
562 if (sizeof(wchar_t) == 2) {
563 # if WORDS_LITTLEENDIAN
564 index = ei_utf16le;
565 # else
566 index = ei_utf16be;
567 # endif
568 break;
570 #elif __STDC_ISO_10646__
571 if (sizeof(wchar_t) == 4) {
572 index = ei_ucs4internal;
573 break;
575 if (sizeof(wchar_t) == 2) {
576 index = ei_ucs2internal;
577 break;
579 if (sizeof(wchar_t) == 1) {
580 index = ei_iso8859_1;
581 break;
583 #endif
585 index = ap->encoding_index;
586 break;
588 return all_canonical[index] + pool;
589 invalid:
590 return name;
593 int _libiconv_version = _LIBICONV_VERSION;
595 #if defined __FreeBSD__ && !defined __gnu_freebsd__
596 /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
597 It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
598 #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
599 #define _strong_alias(name, aliasname) \
600 extern __typeof (name) aliasname __attribute__ ((alias (#name)));
601 #undef iconv_open
602 #undef iconv
603 #undef iconv_close
604 strong_alias (libiconv_open, iconv_open)
605 strong_alias (libiconv, iconv)
606 strong_alias (libiconv_close, iconv_close)
607 #endif
609 #endif