Support for "iconv -l".
[libiconv.git] / lib / iconv.c
blob9a1cd485eccf6500f381ce78dd882c2e0c464b94
1 /*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 #include <iconv.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "config.h"
26 #include "libcharset.h"
29 * Consider those system dependent encodings that are needed for the
30 * current system.
32 #ifdef _AIX
33 #define USE_AIX
34 #endif
35 #ifdef __osf__
36 #define USE_OSF1
37 #endif
38 #ifdef __DJGPP__
39 #define USE_DOS
40 #endif
43 * Data type for general conversion loop.
45 struct loop_funcs {
46 size_t (*loop_convert) (iconv_t icd,
47 const char* * inbuf, size_t *inbytesleft,
48 char* * outbuf, size_t *outbytesleft);
49 size_t (*loop_reset) (iconv_t icd,
50 char* * outbuf, size_t *outbytesleft);
54 * Converters.
56 #include "converters.h"
59 * Transliteration tables.
61 #include "cjk_variants.h"
62 #include "translit.h"
65 * Table of all supported encodings.
67 struct encoding {
68 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
69 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
70 int oflags; /* flags for unicode -> multibyte conversion */
72 enum {
73 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
74 ei_##xxx ,
75 #include "encodings.def"
76 #ifdef USE_AIX
77 #include "encodings_aix.def"
78 #endif
79 #ifdef USE_OSF1
80 #include "encodings_osf1.def"
81 #endif
82 #ifdef USE_DOS
83 #include "encodings_dos.def"
84 #endif
85 #include "encodings_local.def"
86 #undef DEFENCODING
87 ei_for_broken_compilers_that_dont_like_trailing_commas
89 #include "flags.h"
90 static struct encoding const all_encodings[] = {
91 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
92 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
93 #include "encodings.def"
94 #ifdef USE_AIX
95 #include "encodings_aix.def"
96 #endif
97 #ifdef USE_OSF1
98 #include "encodings_osf1.def"
99 #endif
100 #ifdef USE_DOS
101 #include "encodings_dos.def"
102 #endif
103 #undef DEFENCODING
104 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
105 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
106 #include "encodings_local.def"
107 #undef DEFENCODING
111 * Conversion loops.
113 #include "loops.h"
116 * Alias lookup function.
117 * Defines
118 * struct alias { const char* name; unsigned int encoding_index; };
119 * const struct alias * aliases_lookup (const char *str, unsigned int len);
120 * #define MAX_WORD_LENGTH ...
122 #include "aliases.h"
125 * System dependent alias lookup function.
126 * Defines
127 * const struct alias * aliases2_lookup (const char *str);
129 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) /* || ... */
130 static struct alias sysdep_aliases[] = {
131 #ifdef USE_AIX
132 #include "aliases_aix.h"
133 #endif
134 #ifdef USE_OSF1
135 #include "aliases_osf1.h"
136 #endif
137 #ifdef USE_DOS
138 #include "aliases_dos.h"
139 #endif
141 #ifdef __GNUC__
142 __inline
143 #endif
144 const struct alias *
145 aliases2_lookup (register const char *str)
147 struct alias * ptr;
148 unsigned int count;
149 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
150 if (!strcmp(str,ptr->name))
151 return ptr;
152 return NULL;
154 #else
155 #define aliases2_lookup(str) NULL
156 #endif
158 #if 0
159 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
160 and the first string can be assumed to be in uppercase. */
161 static int strequal (const char* str1, const char* str2)
163 unsigned char c1;
164 unsigned char c2;
165 for (;;) {
166 c1 = * (unsigned char *) str1++;
167 c2 = * (unsigned char *) str2++;
168 if (c1 == 0)
169 break;
170 if (c2 >= 'a' && c2 <= 'z')
171 c2 -= 'a'-'A';
172 if (c1 != c2)
173 break;
175 return (c1 == c2);
177 #endif
179 iconv_t iconv_open (const char* tocode, const char* fromcode)
181 struct conv_struct * cd;
182 char buf[MAX_WORD_LENGTH+10+1];
183 const char* cp;
184 char* bp;
185 const struct alias * ap;
186 unsigned int count;
187 unsigned int from_index;
188 int from_wchar;
189 unsigned int to_index;
190 int to_wchar;
191 int transliterate = 0;
193 /* Before calling aliases_lookup, convert the input string to upper case,
194 * and check whether it's entirely ASCII (we call gperf with option "-7"
195 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
196 * or if it's too long, it is not a valid encoding name.
198 for (to_wchar = 0;;) {
199 /* Search tocode in the table. */
200 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
201 unsigned char c = * (unsigned char *) cp;
202 if (c >= 0x80)
203 goto invalid;
204 if (c >= 'a' && c <= 'z')
205 c -= 'a'-'A';
206 *bp = c;
207 if (c == '\0')
208 break;
209 if (--count == 0)
210 goto invalid;
212 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
213 bp -= 10;
214 *bp = '\0';
215 transliterate = 1;
217 if (buf[0] == '\0') {
218 tocode = locale_charset();
219 /* Avoid an endless loop that could occur when using an older version
220 of localcharset.c. */
221 if (tocode[0] == '\0')
222 goto invalid;
223 continue;
225 ap = aliases_lookup(buf,bp-buf);
226 if (ap == NULL) {
227 ap = aliases2_lookup(buf);
228 if (ap == NULL)
229 goto invalid;
231 if (ap->encoding_index == ei_local_char) {
232 tocode = locale_charset();
233 /* Avoid an endless loop that could occur when using an older version
234 of localcharset.c. */
235 if (tocode[0] == '\0')
236 goto invalid;
237 continue;
239 if (ap->encoding_index == ei_local_wchar_t) {
240 #if __STDC_ISO_10646__
241 if (sizeof(wchar_t) == 4) {
242 to_index = ei_ucs4internal;
243 break;
245 if (sizeof(wchar_t) == 2) {
246 to_index = ei_ucs2internal;
247 break;
249 if (sizeof(wchar_t) == 1) {
250 to_index = ei_iso8859_1;
251 break;
253 #endif
254 #if HAVE_MBRTOWC
255 to_wchar = 1;
256 tocode = locale_charset();
257 continue;
258 #endif
259 goto invalid;
261 to_index = ap->encoding_index;
262 break;
264 for (from_wchar = 0;;) {
265 /* Search fromcode in the table. */
266 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
267 unsigned char c = * (unsigned char *) cp;
268 if (c >= 0x80)
269 goto invalid;
270 if (c >= 'a' && c <= 'z')
271 c -= 'a'-'A';
272 *bp = c;
273 if (c == '\0')
274 break;
275 if (--count == 0)
276 goto invalid;
278 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
279 bp -= 10;
280 *bp = '\0';
282 if (buf[0] == '\0') {
283 fromcode = locale_charset();
284 /* Avoid an endless loop that could occur when using an older version
285 of localcharset.c. */
286 if (fromcode[0] == '\0')
287 goto invalid;
288 continue;
290 ap = aliases_lookup(buf,bp-buf);
291 if (ap == NULL) {
292 ap = aliases2_lookup(buf);
293 if (ap == NULL)
294 goto invalid;
296 if (ap->encoding_index == ei_local_char) {
297 fromcode = locale_charset();
298 /* Avoid an endless loop that could occur when using an older version
299 of localcharset.c. */
300 if (fromcode[0] == '\0')
301 goto invalid;
302 continue;
304 if (ap->encoding_index == ei_local_wchar_t) {
305 #if __STDC_ISO_10646__
306 if (sizeof(wchar_t) == 4) {
307 from_index = ei_ucs4internal;
308 break;
310 if (sizeof(wchar_t) == 2) {
311 from_index = ei_ucs2internal;
312 break;
314 if (sizeof(wchar_t) == 1) {
315 from_index = ei_iso8859_1;
316 break;
318 #endif
319 #if HAVE_WCRTOMB
320 from_wchar = 1;
321 fromcode = locale_charset();
322 continue;
323 #endif
324 goto invalid;
326 from_index = ap->encoding_index;
327 break;
329 cd = (struct conv_struct *) malloc(from_wchar != to_wchar
330 ? sizeof(struct wchar_conv_struct)
331 : sizeof(struct conv_struct));
332 if (cd == NULL) {
333 errno = ENOMEM;
334 return (iconv_t)(-1);
336 cd->iindex = from_index;
337 cd->ifuncs = all_encodings[from_index].ifuncs;
338 cd->oindex = to_index;
339 cd->ofuncs = all_encodings[to_index].ofuncs;
340 cd->oflags = all_encodings[to_index].oflags;
341 /* Initialize the loop functions. */
342 #if HAVE_MBRTOWC
343 if (to_wchar) {
344 #if HAVE_WCRTOMB
345 if (from_wchar) {
346 cd->lfuncs.loop_convert = wchar_id_loop_convert;
347 cd->lfuncs.loop_reset = wchar_id_loop_reset;
348 } else
349 #endif
351 cd->lfuncs.loop_convert = wchar_to_loop_convert;
352 cd->lfuncs.loop_reset = wchar_to_loop_reset;
354 } else
355 #endif
357 #if HAVE_WCRTOMB
358 if (from_wchar) {
359 cd->lfuncs.loop_convert = wchar_from_loop_convert;
360 cd->lfuncs.loop_reset = wchar_from_loop_reset;
361 } else
362 #endif
364 cd->lfuncs.loop_convert = unicode_loop_convert;
365 cd->lfuncs.loop_reset = unicode_loop_reset;
368 /* Initialize the states. */
369 memset(&cd->istate,'\0',sizeof(state_t));
370 memset(&cd->ostate,'\0',sizeof(state_t));
371 /* Initialize the operation flags. */
372 cd->transliterate = transliterate;
373 /* Initialize additional fields. */
374 if (from_wchar != to_wchar) {
375 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
376 memset(&wcd->state,'\0',sizeof(mbstate_t));
378 /* Done. */
379 return (iconv_t)cd;
380 invalid:
381 errno = EINVAL;
382 return (iconv_t)(-1);
385 size_t iconv (iconv_t icd,
386 ICONV_CONST char* * inbuf, size_t *inbytesleft,
387 char* * outbuf, size_t *outbytesleft)
389 conv_t cd = (conv_t) icd;
390 if (inbuf == NULL || *inbuf == NULL)
391 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
392 else
393 return cd->lfuncs.loop_convert(icd,
394 (const char* *)inbuf,inbytesleft,
395 outbuf,outbytesleft);
398 int iconv_close (iconv_t icd)
400 conv_t cd = (conv_t) icd;
401 free(cd);
402 return 0;
405 #ifndef LIBICONV_PLUG
407 int iconvctl (iconv_t icd, int request, void* argument)
409 conv_t cd = (conv_t) icd;
410 switch (request) {
411 case ICONV_TRIVIALP:
412 *(int *)argument =
413 ((cd->lfuncs.loop_convert == unicode_loop_convert
414 && cd->iindex == cd->oindex)
415 || cd->lfuncs.loop_convert == wchar_id_loop_convert
416 ? 1 : 0);
417 return 0;
418 case ICONV_GET_TRANSLITERATE:
419 *(int *)argument = cd->transliterate;
420 return 0;
421 case ICONV_SET_TRANSLITERATE:
422 cd->transliterate = (*(const int *)argument ? 1 : 0);
423 return 0;
424 default:
425 errno = EINVAL;
426 return -1;
430 static int compare_by_index (const void * arg1, const void * arg2)
432 const struct alias * alias1 = (const struct alias *) arg1;
433 const struct alias * alias2 = (const struct alias *) arg2;
434 return (int)alias1->encoding_index - (int)alias2->encoding_index;
437 static int compare_by_name (const void * arg1, const void * arg2)
439 const char * name1 = *(const char **)arg1;
440 const char * name2 = *(const char **)arg2;
441 /* Compare alphabetically, but put "CS" names at the end. */
442 int sign = strcmp(name1,name2);
443 if (sign != 0) {
444 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
445 * 4 + (sign >= 0 ? 1 : -1);
447 return sign;
450 void iconvlist (int (*do_one) (unsigned int namescount,
451 const char * const * names,
452 void* data),
453 void* data)
455 #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
456 #ifndef aliases2_lookup
457 #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
458 #else
459 #define aliascount2 0
460 #endif
461 #define aliascount (aliascount1+aliascount2)
462 struct alias aliasbuf[aliascount];
463 const char * namesbuf[aliascount];
464 size_t num_aliases;
466 /* Put all existing aliases into a buffer. */
467 size_t i;
468 size_t j;
469 j = 0;
470 for (i = 0; i < aliascount1; i++) {
471 const struct alias * p = &aliases[i];
472 if (p->name[0] != '\0'
473 && p->encoding_index != ei_local_char
474 && p->encoding_index != ei_local_wchar_t)
475 aliasbuf[j++] = *p;
477 #ifndef aliases2_lookup
478 for (i = 0; i < aliascount2; i++)
479 aliasbuf[j++] = sysdep_aliases[i];
480 #endif
481 num_aliases = j;
483 /* Sort by encoding_index. */
484 if (num_aliases > 1)
485 qsort(aliasbuf, num_aliases, sizeof(struct alias), compare_by_index);
487 /* Process all aliases with the same encoding_index together. */
488 size_t j;
489 j = 0;
490 while (j < num_aliases) {
491 unsigned int ei = aliasbuf[j].encoding_index;
492 size_t i = 0;
494 namesbuf[i++] = aliasbuf[j++].name;
495 while (j < num_aliases && aliasbuf[j].encoding_index == ei);
496 if (i > 1)
497 qsort(namesbuf, i, sizeof(const char *), compare_by_name);
498 /* Call the callback. */
499 if (do_one(i,namesbuf,data))
500 break;
503 #undef aliascount
504 #undef aliascount2
505 #undef aliascount1
508 int _libiconv_version = _LIBICONV_VERSION;
510 #endif