2 * Copyright (C) 1999-2008, 2011, 2016, 2018, 2020, 2022-2023 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
26 #include "localcharset.h"
29 #include <cygwin/version.h>
34 * Consider all system dependent encodings, for any system,
35 * and the extra encodings.
44 * Consider those system dependent encodings that are needed for the
50 #if defined(__osf__) || defined(VMS)
53 #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
56 /* Enable the EBCDIC encodings not only on z/OS but also on Linux/s390, for
57 easier interoperability between z/OS and Linux/s390. */
58 #if defined(__MVS__) || (defined(__linux__) && (defined(__s390__) || defined(__s390x__)))
64 * Data type for general conversion loop.
67 size_t (*loop_convert
) (iconv_t icd
,
68 const char* * inbuf
, size_t *inbytesleft
,
69 char* * outbuf
, size_t *outbytesleft
);
70 size_t (*loop_reset
) (iconv_t icd
,
71 char* * outbuf
, size_t *outbytesleft
);
77 #include "converters.h"
80 * Transliteration tables.
82 #include "cjk_variants.h"
86 * Table of all supported encodings.
89 struct mbtowc_funcs ifuncs
; /* conversion multibyte -> unicode */
90 struct wctomb_funcs ofuncs
; /* conversion unicode -> multibyte */
91 int oflags
; /* flags for unicode -> multibyte conversion */
93 #define DEFALIAS(xxx_alias,xxx) /* nothing */
95 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
97 #include "encodings.def"
99 # include "encodings_aix.def"
102 # include "encodings_osf1.def"
105 # include "encodings_dos.def"
108 # include "encodings_zos.def"
111 # include "encodings_extra.def"
113 #include "encodings_local.def"
115 ei_for_broken_compilers_that_dont_like_trailing_commas
118 static struct encoding
const all_encodings
[] = {
119 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
120 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
121 #include "encodings.def"
123 # include "encodings_aix.def"
126 # include "encodings_osf1.def"
129 # include "encodings_dos.def"
132 # include "encodings_zos.def"
135 # include "encodings_extra.def"
138 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
139 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
140 #include "encodings_local.def"
151 * Alias lookup function.
153 * struct alias { int name; unsigned int encoding_index; };
154 * const struct alias * aliases_lookup (const char *str, unsigned int len);
155 * #define MAX_WORD_LENGTH ...
158 # include "aliases_sysaix.h"
159 #elif defined hpux || defined __hpux
160 # include "aliases_syshpux.h"
161 #elif defined __osf__
162 # include "aliases_sysosf1.h"
164 # include "aliases_syssolaris.h"
166 # include "aliases.h"
170 * System dependent alias lookup function.
172 * const struct alias * aliases2_lookup (const char *str);
174 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_ZOS) || defined(USE_EXTRA) /* || ... */
175 struct stringpool2_t
{
176 #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
177 #include "aliases2.h"
180 static const struct stringpool2_t stringpool2_contents
= {
181 #define S(tag,name,encoding_index) name,
182 #include "aliases2.h"
185 #define stringpool2 ((const char *) &stringpool2_contents)
186 static const struct alias sysdep_aliases
[] = {
187 #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
188 #include "aliases2.h"
198 static const struct alias
*
199 aliases2_lookup (register const char *str
)
201 const struct alias
* ptr
;
203 for (ptr
= sysdep_aliases
, count
= sizeof(sysdep_aliases
)/sizeof(sysdep_aliases
[0]); count
> 0; ptr
++, count
--)
204 if (!strcmp(str
, stringpool2
+ ptr
->name
))
209 #define aliases2_lookup(str) NULL
210 #define stringpool2 NULL
214 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
215 and the first string can be assumed to be in uppercase. */
216 static int strequal (const char* str1
, const char* str2
)
221 c1
= * (unsigned char *) str1
++;
222 c2
= * (unsigned char *) str2
++;
225 if (c2
>= 'a' && c2
<= 'z')
234 iconv_t
iconv_open (const char* tocode
, const char* fromcode
)
236 struct conv_struct
* cd
;
237 unsigned int from_index
;
239 unsigned int from_surface
;
240 unsigned int to_index
;
242 unsigned int to_surface
;
246 #include "iconv_open1.h"
248 cd
= (struct conv_struct
*) malloc(from_wchar
!= to_wchar
249 ? sizeof(struct wchar_conv_struct
)
250 : sizeof(struct conv_struct
));
253 return (iconv_t
)(-1);
256 #include "iconv_open2.h"
261 return (iconv_t
)(-1);
264 size_t iconv (iconv_t icd
,
265 ICONV_CONST
char* * inbuf
, size_t *inbytesleft
,
266 char* * outbuf
, size_t *outbytesleft
)
268 conv_t cd
= (conv_t
) icd
;
269 if (inbuf
== NULL
|| *inbuf
== NULL
)
270 return cd
->lfuncs
.loop_reset(icd
,outbuf
,outbytesleft
);
272 return cd
->lfuncs
.loop_convert(icd
,
273 (const char* *)inbuf
,inbytesleft
,
274 outbuf
,outbytesleft
);
277 int iconv_close (iconv_t icd
)
279 conv_t cd
= (conv_t
) icd
;
284 #ifndef LIBICONV_PLUG
287 * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
288 * fit in an iconv_allocation_t.
289 * If this verification fails, iconv_allocation_t must be made larger and
290 * the major version in LIBICONV_VERSION_INFO must be bumped.
291 * Currently 'struct conv_struct' has 23 integer/pointer fields, and
292 * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
294 typedef int verify_size_1
[2 * (sizeof (struct conv_struct
) <= sizeof (iconv_allocation_t
)) - 1];
295 typedef int verify_size_2
[2 * (sizeof (struct wchar_conv_struct
) <= sizeof (iconv_allocation_t
)) - 1];
297 int iconv_open_into (const char* tocode
, const char* fromcode
,
298 iconv_allocation_t
* resultp
)
300 struct conv_struct
* cd
;
301 unsigned int from_index
;
303 unsigned int from_surface
;
304 unsigned int to_index
;
306 unsigned int to_surface
;
310 #include "iconv_open1.h"
312 cd
= (struct conv_struct
*) resultp
;
314 #include "iconv_open2.h"
322 /* Bit mask of all valid surfaces. */
323 #define ALL_SURFACES (ICONV_SURFACE_EBCDIC_ZOS_UNIX)
325 int iconvctl (iconv_t icd
, int request
, void* argument
)
327 conv_t cd
= (conv_t
) icd
;
331 ((cd
->lfuncs
.loop_convert
== unicode_loop_convert
332 && cd
->iindex
== cd
->oindex
333 && cd
->isurface
== cd
->osurface
)
334 || cd
->lfuncs
.loop_convert
== wchar_id_loop_convert
337 case ICONV_GET_TRANSLITERATE
:
338 *(int *)argument
= cd
->transliterate
;
340 case ICONV_SET_TRANSLITERATE
:
341 cd
->transliterate
= (*(const int *)argument
? 1 : 0);
343 case ICONV_GET_DISCARD_ILSEQ
:
344 *(int *)argument
= cd
->discard_ilseq
;
346 case ICONV_SET_DISCARD_ILSEQ
:
347 cd
->discard_ilseq
= (*(const int *)argument
? 1 : 0);
349 case ICONV_SET_HOOKS
:
350 if (argument
!= NULL
) {
351 cd
->hooks
= *(const struct iconv_hooks
*)argument
;
353 cd
->hooks
.uc_hook
= NULL
;
354 cd
->hooks
.wc_hook
= NULL
;
355 cd
->hooks
.data
= NULL
;
358 case ICONV_SET_FALLBACKS
:
359 if (argument
!= NULL
) {
360 cd
->fallbacks
= *(const struct iconv_fallbacks
*)argument
;
362 cd
->fallbacks
.mb_to_uc_fallback
= NULL
;
363 cd
->fallbacks
.uc_to_mb_fallback
= NULL
;
364 cd
->fallbacks
.mb_to_wc_fallback
= NULL
;
365 cd
->fallbacks
.wc_to_mb_fallback
= NULL
;
366 cd
->fallbacks
.data
= NULL
;
369 case ICONV_GET_FROM_SURFACE
:
370 *(unsigned int *)argument
= cd
->isurface
;
372 case ICONV_SET_FROM_SURFACE
:
373 if ((*(const unsigned int *)argument
& ~ALL_SURFACES
) == 0) {
374 cd
->isurface
= *(const unsigned int *)argument
;
380 case ICONV_GET_TO_SURFACE
:
381 *(unsigned int *)argument
= cd
->osurface
;
383 case ICONV_SET_TO_SURFACE
:
384 if ((*(const unsigned int *)argument
& ~ALL_SURFACES
) == 0) {
385 cd
->osurface
= *(const unsigned int *)argument
;
397 /* An alias after its name has been converted from 'int' to 'const char*'. */
398 struct nalias
{ const char* name
; unsigned int encoding_index
; };
400 static int compare_by_index (const void * arg1
, const void * arg2
)
402 const struct nalias
* alias1
= (const struct nalias
*) arg1
;
403 const struct nalias
* alias2
= (const struct nalias
*) arg2
;
404 return (int)alias1
->encoding_index
- (int)alias2
->encoding_index
;
407 static int compare_by_name (const void * arg1
, const void * arg2
)
409 const char * name1
= *(const char * const *)arg1
;
410 const char * name2
= *(const char * const *)arg2
;
411 /* Compare alphabetically, but put "CS" names at the end. */
412 int sign
= strcmp(name1
,name2
);
414 sign
= ((name1
[0]=='C' && name1
[1]=='S') - (name2
[0]=='C' && name2
[1]=='S'))
415 * 4 + (sign
>= 0 ? 1 : -1);
420 void iconvlist (int (*do_one
) (unsigned int namescount
,
421 const char * const * names
,
425 #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
426 #ifndef aliases2_lookup
427 #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
429 #define aliascount2 0
431 #define aliascount (aliascount1+aliascount2)
432 struct nalias aliasbuf
[aliascount
];
433 const char * namesbuf
[aliascount
];
436 /* Put all existing aliases into a buffer. */
440 for (i
= 0; i
< aliascount1
; i
++) {
441 const struct alias
* p
= &aliases
[i
];
443 && p
->encoding_index
!= ei_local_char
444 && p
->encoding_index
!= ei_local_wchar_t
) {
445 aliasbuf
[j
].name
= stringpool
+ p
->name
;
446 aliasbuf
[j
].encoding_index
= p
->encoding_index
;
450 #ifndef aliases2_lookup
451 for (i
= 0; i
< aliascount2
; i
++) {
452 aliasbuf
[j
].name
= stringpool2
+ sysdep_aliases
[i
].name
;
453 aliasbuf
[j
].encoding_index
= sysdep_aliases
[i
].encoding_index
;
459 /* Sort by encoding_index. */
461 qsort(aliasbuf
, num_aliases
, sizeof(struct nalias
), compare_by_index
);
463 /* Process all aliases with the same encoding_index together. */
466 while (j
< num_aliases
) {
467 unsigned int ei
= aliasbuf
[j
].encoding_index
;
470 namesbuf
[i
++] = aliasbuf
[j
++].name
;
471 while (j
< num_aliases
&& aliasbuf
[j
].encoding_index
== ei
);
473 qsort(namesbuf
, i
, sizeof(const char *), compare_by_name
);
474 /* Call the callback. */
475 if (do_one(i
,namesbuf
,data
))
485 * Table of canonical names of encodings.
486 * Instead of strings, it contains offsets into stringpool and stringpool2.
488 static const unsigned short all_canonical
[] = {
490 # include "canonical_sysaix.h"
491 #elif defined hpux || defined __hpux
492 # include "canonical_syshpux.h"
493 #elif defined __osf__
494 # include "canonical_sysosf1.h"
496 # include "canonical_syssolaris.h"
498 # include "canonical.h"
502 # include "canonical_aix_sysaix.h"
504 # include "canonical_aix.h"
509 # include "canonical_osf1_sysosf1.h"
511 # include "canonical_osf1.h"
515 # include "canonical_dos.h"
518 # include "canonical_zos.h"
521 # include "canonical_extra.h"
524 # include "canonical_local_sysaix.h"
525 #elif defined hpux || defined __hpux
526 # include "canonical_local_syshpux.h"
527 #elif defined __osf__
528 # include "canonical_local_sysosf1.h"
530 # include "canonical_local_syssolaris.h"
532 # include "canonical_local.h"
536 const char * iconv_canonicalize (const char * name
)
539 char buf
[MAX_WORD_LENGTH
+10+1];
542 const struct alias
* ap
;
547 /* Before calling aliases_lookup, convert the input string to upper case,
548 * and check whether it's entirely ASCII (we call gperf with option "-7"
549 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
550 * or if it's too long, it is not a valid encoding name.
552 for (code
= name
;;) {
553 /* Search code in the table. */
554 for (cp
= code
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
555 unsigned char c
= (unsigned char) *cp
;
558 if (c
>= 'a' && c
<= 'z')
567 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
572 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
579 if (buf
[0] == '\0') {
580 code
= locale_charset();
581 /* Avoid an endless loop that could occur when using an older version
582 of localcharset.c. */
588 ap
= aliases_lookup(buf
,bp
-buf
);
591 ap
= aliases2_lookup(buf
);
595 if (ap
->encoding_index
== ei_local_char
) {
596 code
= locale_charset();
597 /* Avoid an endless loop that could occur when using an older version
598 of localcharset.c. */
603 if (ap
->encoding_index
== ei_local_wchar_t
) {
604 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
605 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
606 we know that it is UTF-16. */
607 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
608 if (sizeof(wchar_t) == 4) {
609 index
= ei_ucs4internal
;
612 if (sizeof(wchar_t) == 2) {
613 # if WORDS_LITTLEENDIAN
620 #elif __STDC_ISO_10646__
621 if (sizeof(wchar_t) == 4) {
622 index
= ei_ucs4internal
;
625 if (sizeof(wchar_t) == 2) {
626 index
= ei_ucs2internal
;
629 if (sizeof(wchar_t) == 1) {
630 index
= ei_iso8859_1
;
635 index
= ap
->encoding_index
;
638 return all_canonical
[index
] + pool
;
643 int _libiconv_version
= _LIBICONV_VERSION
;
645 #if defined __FreeBSD__ && !defined __gnu_freebsd__
646 /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
647 It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
648 #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
649 #define _strong_alias(name, aliasname) \
650 extern __typeof (name) aliasname __attribute__ ((alias (#name)));
654 strong_alias (libiconv_open
, iconv_open
)
655 strong_alias (libiconv
, iconv
)
656 strong_alias (libiconv_close
, iconv_close
)