Update the testsuite for changed GB18030 converter.
[libiconv.git] / lib / iconv.c
blobf03edc290f36451146d0c44ab37f9560a8f0ab12
1 /*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 #include <iconv.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "config.h"
26 #include "libcharset.h"
29 * Consider those system dependent encodings that are needed for the
30 * current system.
32 #ifdef _AIX
33 #define USE_AIX
34 #endif
35 #ifdef __osf__
36 #define USE_OSF1
37 #endif
38 #ifdef __DJGPP__
39 #define USE_DOS
40 #endif
43 * Data type for general conversion loop.
45 struct loop_funcs {
46 size_t (*loop_convert) (iconv_t icd,
47 const char* * inbuf, size_t *inbytesleft,
48 char* * outbuf, size_t *outbytesleft);
49 size_t (*loop_reset) (iconv_t icd,
50 char* * outbuf, size_t *outbytesleft);
54 * Converters.
56 #include "converters.h"
59 * Transliteration tables.
61 #include "cjk_variants.h"
62 #include "translit.h"
65 * Table of all supported encodings.
67 struct encoding {
68 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
69 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
70 int oflags; /* flags for unicode -> multibyte conversion */
72 enum {
73 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
74 ei_##xxx ,
75 #include "encodings.def"
76 #ifdef USE_AIX
77 #include "encodings_aix.def"
78 #endif
79 #ifdef USE_OSF1
80 #include "encodings_osf1.def"
81 #endif
82 #ifdef USE_DOS
83 #include "encodings_dos.def"
84 #endif
85 #include "encodings_local.def"
86 #undef DEFENCODING
87 ei_for_broken_compilers_that_dont_like_trailing_commas
89 #include "flags.h"
90 static struct encoding const all_encodings[] = {
91 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
92 { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
93 #include "encodings.def"
94 #ifdef USE_AIX
95 #include "encodings_aix.def"
96 #endif
97 #ifdef USE_OSF1
98 #include "encodings_osf1.def"
99 #endif
100 #ifdef USE_DOS
101 #include "encodings_dos.def"
102 #endif
103 #undef DEFENCODING
104 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
105 { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, 0 },
106 #include "encodings_local.def"
107 #undef DEFENCODING
111 * Conversion loops.
113 #include "loops.h"
116 * Alias lookup function.
117 * Defines
118 * struct alias { const char* name; unsigned int encoding_index; };
119 * const struct alias * aliases_lookup (const char *str, unsigned int len);
120 * #define MAX_WORD_LENGTH ...
122 #include "aliases.h"
125 * System dependent alias lookup function.
126 * Defines
127 * const struct alias * aliases2_lookup (const char *str);
129 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) /* || ... */
130 static struct alias sysdep_aliases[] = {
131 #ifdef USE_AIX
132 #include "aliases_aix.h"
133 #endif
134 #ifdef USE_OSF1
135 #include "aliases_osf1.h"
136 #endif
137 #ifdef USE_DOS
138 #include "aliases_dos.h"
139 #endif
141 #ifdef __GNUC__
142 __inline
143 #endif
144 const struct alias *
145 aliases2_lookup (register const char *str)
147 struct alias * ptr;
148 unsigned int count;
149 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
150 if (!strcmp(str,ptr->name))
151 return ptr;
152 return NULL;
154 #else
155 #define aliases2_lookup(str) NULL
156 #endif
158 #if 0
159 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
160 and the first string can be assumed to be in uppercase. */
161 static int strequal (const char* str1, const char* str2)
163 unsigned char c1;
164 unsigned char c2;
165 for (;;) {
166 c1 = * (unsigned char *) str1++;
167 c2 = * (unsigned char *) str2++;
168 if (c1 == 0)
169 break;
170 if (c2 >= 'a' && c2 <= 'z')
171 c2 -= 'a'-'A';
172 if (c1 != c2)
173 break;
175 return (c1 == c2);
177 #endif
179 iconv_t iconv_open (const char* tocode, const char* fromcode)
181 struct conv_struct * cd;
182 char buf[MAX_WORD_LENGTH+10+1];
183 const char* cp;
184 char* bp;
185 const struct alias * ap;
186 unsigned int count;
187 unsigned int from_index;
188 int from_wchar;
189 unsigned int to_index;
190 int to_wchar;
191 int transliterate = 0;
193 /* Before calling aliases_lookup, convert the input string to upper case,
194 * and check whether it's entirely ASCII (we call gperf with option "-7"
195 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
196 * or if it's too long, it is not a valid encoding name.
198 for (to_wchar = 0;;) {
199 /* Search tocode in the table. */
200 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
201 unsigned char c = * (unsigned char *) cp;
202 if (c >= 0x80)
203 goto invalid;
204 if (c >= 'a' && c <= 'z')
205 c -= 'a'-'A';
206 *bp = c;
207 if (c == '\0')
208 break;
209 if (--count == 0)
210 goto invalid;
212 if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
213 bp -= 10;
214 *bp = '\0';
215 transliterate = 1;
217 ap = aliases_lookup(buf,bp-buf);
218 if (ap == NULL) {
219 ap = aliases2_lookup(buf);
220 if (ap == NULL)
221 goto invalid;
223 if (ap->encoding_index == ei_local_char) {
224 tocode = locale_charset();
225 continue;
227 if (ap->encoding_index == ei_local_wchar_t) {
228 #if __STDC_ISO_10646__
229 if (sizeof(wchar_t) == 4) {
230 to_index = ei_ucs4internal;
231 break;
233 if (sizeof(wchar_t) == 2) {
234 to_index = ei_ucs2internal;
235 break;
237 if (sizeof(wchar_t) == 1) {
238 to_index = ei_iso8859_1;
239 break;
241 #endif
242 #if HAVE_MBRTOWC
243 to_wchar = 1;
244 tocode = locale_charset();
245 continue;
246 #endif
247 goto invalid;
249 to_index = ap->encoding_index;
250 break;
252 for (from_wchar = 0;;) {
253 /* Search fromcode in the table. */
254 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
255 unsigned char c = * (unsigned char *) cp;
256 if (c >= 0x80)
257 goto invalid;
258 if (c >= 'a' && c <= 'z')
259 c -= 'a'-'A';
260 *bp = c;
261 if (c == '\0')
262 break;
263 if (--count == 0)
264 goto invalid;
266 if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
267 bp -= 10;
268 *bp = '\0';
270 ap = aliases_lookup(buf,bp-buf);
271 if (ap == NULL) {
272 ap = aliases2_lookup(buf);
273 if (ap == NULL)
274 goto invalid;
276 if (ap->encoding_index == ei_local_char) {
277 fromcode = locale_charset();
278 continue;
280 if (ap->encoding_index == ei_local_wchar_t) {
281 #if __STDC_ISO_10646__
282 if (sizeof(wchar_t) == 4) {
283 from_index = ei_ucs4internal;
284 break;
286 if (sizeof(wchar_t) == 2) {
287 from_index = ei_ucs2internal;
288 break;
290 if (sizeof(wchar_t) == 1) {
291 from_index = ei_iso8859_1;
292 break;
294 #endif
295 #if HAVE_WCRTOMB
296 from_wchar = 1;
297 fromcode = locale_charset();
298 continue;
299 #endif
300 goto invalid;
302 from_index = ap->encoding_index;
303 break;
305 cd = (struct conv_struct *) malloc(from_wchar != to_wchar
306 ? sizeof(struct wchar_conv_struct)
307 : sizeof(struct conv_struct));
308 if (cd == NULL) {
309 errno = ENOMEM;
310 return (iconv_t)(-1);
312 cd->iindex = from_index;
313 cd->ifuncs = all_encodings[from_index].ifuncs;
314 cd->oindex = to_index;
315 cd->ofuncs = all_encodings[to_index].ofuncs;
316 cd->oflags = all_encodings[to_index].oflags;
317 /* Initialize the loop functions. */
318 #if HAVE_MBRTOWC
319 if (to_wchar) {
320 #if HAVE_WCRTOMB
321 if (from_wchar) {
322 cd->lfuncs.loop_convert = wchar_id_loop_convert;
323 cd->lfuncs.loop_reset = wchar_id_loop_reset;
324 } else
325 #endif
327 cd->lfuncs.loop_convert = wchar_to_loop_convert;
328 cd->lfuncs.loop_reset = wchar_to_loop_reset;
330 } else
331 #endif
333 #if HAVE_WCRTOMB
334 if (from_wchar) {
335 cd->lfuncs.loop_convert = wchar_from_loop_convert;
336 cd->lfuncs.loop_reset = wchar_from_loop_reset;
337 } else
338 #endif
340 cd->lfuncs.loop_convert = unicode_loop_convert;
341 cd->lfuncs.loop_reset = unicode_loop_reset;
344 /* Initialize the states. */
345 memset(&cd->istate,'\0',sizeof(state_t));
346 memset(&cd->ostate,'\0',sizeof(state_t));
347 /* Initialize the operation flags. */
348 cd->transliterate = transliterate;
349 /* Initialize additional fields. */
350 if (from_wchar != to_wchar) {
351 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
352 memset(&wcd->state,'\0',sizeof(mbstate_t));
354 /* Done. */
355 return (iconv_t)cd;
356 invalid:
357 errno = EINVAL;
358 return (iconv_t)(-1);
361 size_t iconv (iconv_t icd,
362 ICONV_CONST char* * inbuf, size_t *inbytesleft,
363 char* * outbuf, size_t *outbytesleft)
365 conv_t cd = (conv_t) icd;
366 if (inbuf == NULL || *inbuf == NULL)
367 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
368 else
369 return cd->lfuncs.loop_convert(icd,
370 (const char* *)inbuf,inbytesleft,
371 outbuf,outbytesleft);
374 int iconv_close (iconv_t icd)
376 conv_t cd = (conv_t) icd;
377 free(cd);
378 return 0;
381 #ifndef LIBICONV_PLUG
383 int iconvctl (iconv_t icd, int request, void* argument)
385 conv_t cd = (conv_t) icd;
386 switch (request) {
387 case ICONV_TRIVIALP:
388 *(int *)argument =
389 ((cd->lfuncs.loop_convert == unicode_loop_convert
390 && cd->iindex == cd->oindex)
391 || cd->lfuncs.loop_convert == wchar_id_loop_convert
392 ? 1 : 0);
393 return 0;
394 case ICONV_GET_TRANSLITERATE:
395 *(int *)argument = cd->transliterate;
396 return 0;
397 case ICONV_SET_TRANSLITERATE:
398 cd->transliterate = (*(const int *)argument ? 1 : 0);
399 return 0;
400 default:
401 errno = EINVAL;
402 return -1;
406 int _libiconv_version = _LIBICONV_VERSION;
408 #endif