2 * Copyright (C) 1999-2008, 2011, 2018, 2020, 2023 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* Part 1 of iconv_open.
21 Input: const char* tocode, const char* fromcode.
23 unsigned int from_index;
25 unsigned int from_surface;
26 unsigned int to_index;
28 unsigned int to_surface;
31 Jumps to 'invalid' in case of errror.
34 char buf
[MAX_WORD_LENGTH
+9+9+1];
37 const struct alias
* ap
;
40 from_surface
= ICONV_SURFACE_NONE
;
41 to_surface
= ICONV_SURFACE_NONE
;
45 /* Before calling aliases_lookup, convert the input string to upper case,
46 * and check whether it's entirely ASCII (we call gperf with option "-7"
47 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
48 * or if it's too long, it is not a valid encoding name.
50 for (to_wchar
= 0;;) {
51 /* Search tocode in the table. */
52 for (cp
= tocode
, bp
= buf
, count
= MAX_WORD_LENGTH
+9+9+1; ; cp
++, bp
++) {
53 unsigned char c
= (unsigned char) *cp
;
56 if (c
>= 'a' && c
<= 'z')
66 int parsed_translit
= 0;
67 int parsed_ignore
= 0;
68 if (sp
-buf
> 9 && memcmp(sp
-9,"/TRANSLIT",9)==0) {
71 } else if (sp
-buf
> 7 && memcmp(sp
-7,"/IGNORE",7)==0) {
75 if (sp
> buf
&& memcmp(sp
-1,"/",1) == 0) {
77 } else if (sp
-buf
>= 9 && memcmp(sp
-9,"/ZOS_UNIX",9)==0) {
79 to_surface
= ICONV_SURFACE_EBCDIC_ZOS_UNIX
;
90 tocode
= locale_charset();
91 /* Avoid an endless loop that could occur when using an older version
93 if (tocode
[0] == '\0')
97 ap
= aliases_lookup(buf
,bp
-buf
);
99 ap
= aliases2_lookup(buf
);
103 if (ap
->encoding_index
== ei_local_char
) {
104 tocode
= locale_charset();
105 /* Avoid an endless loop that could occur when using an older version
106 of localcharset.c. */
107 if (tocode
[0] == '\0')
111 if (ap
->encoding_index
== ei_local_wchar_t
) {
112 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
113 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
114 we know that it is UTF-16. */
115 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
116 if (sizeof(wchar_t) == 4) {
117 to_index
= ei_ucs4internal
;
120 if (sizeof(wchar_t) == 2) {
121 # if WORDS_LITTLEENDIAN
122 to_index
= ei_utf16le
;
124 to_index
= ei_utf16be
;
128 #elif __STDC_ISO_10646__
129 if (sizeof(wchar_t) == 4) {
130 to_index
= ei_ucs4internal
;
133 if (sizeof(wchar_t) == 2) {
134 to_index
= ei_ucs2internal
;
137 if (sizeof(wchar_t) == 1) {
138 to_index
= ei_iso8859_1
;
144 tocode
= locale_charset();
149 to_index
= ap
->encoding_index
;
152 for (from_wchar
= 0;;) {
153 /* Search fromcode in the table. */
154 for (cp
= fromcode
, bp
= buf
, count
= MAX_WORD_LENGTH
+9+9+1; ; cp
++, bp
++) {
155 unsigned char c
= (unsigned char) *cp
;
158 if (c
>= 'a' && c
<= 'z')
168 int parsed_translit
= 0;
169 int parsed_ignore
= 0;
170 if (sp
-buf
> 9 && memcmp(sp
-9,"/TRANSLIT",9)==0) {
173 } else if (sp
-buf
> 7 && memcmp(sp
-7,"/IGNORE",7)==0) {
177 if (sp
> buf
&& memcmp(sp
-1,"/",1) == 0) {
179 } else if (sp
-buf
>= 9 && memcmp(sp
-9,"/ZOS_UNIX",9)==0) {
181 from_surface
= ICONV_SURFACE_EBCDIC_ZOS_UNIX
;
191 if (buf
[0] == '\0') {
192 fromcode
= locale_charset();
193 /* Avoid an endless loop that could occur when using an older version
194 of localcharset.c. */
195 if (fromcode
[0] == '\0')
199 ap
= aliases_lookup(buf
,bp
-buf
);
201 ap
= aliases2_lookup(buf
);
205 if (ap
->encoding_index
== ei_local_char
) {
206 fromcode
= locale_charset();
207 /* Avoid an endless loop that could occur when using an older version
208 of localcharset.c. */
209 if (fromcode
[0] == '\0')
213 if (ap
->encoding_index
== ei_local_wchar_t
) {
214 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
215 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
216 we know that it is UTF-16. */
217 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
218 if (sizeof(wchar_t) == 4) {
219 from_index
= ei_ucs4internal
;
222 if (sizeof(wchar_t) == 2) {
223 # if WORDS_LITTLEENDIAN
224 from_index
= ei_utf16le
;
226 from_index
= ei_utf16be
;
230 #elif __STDC_ISO_10646__
231 if (sizeof(wchar_t) == 4) {
232 from_index
= ei_ucs4internal
;
235 if (sizeof(wchar_t) == 2) {
236 from_index
= ei_ucs2internal
;
239 if (sizeof(wchar_t) == 1) {
240 from_index
= ei_iso8859_1
;
246 fromcode
= locale_charset();
251 from_index
= ap
->encoding_index
;