2 * Copyright (C) 1999-2008, 2011, 2018, 2020 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* Part 1 of iconv_open.
21 Input: const char* tocode, const char* fromcode.
23 unsigned int from_index;
25 unsigned int to_index;
29 Jumps to 'invalid' in case of errror.
32 char buf
[MAX_WORD_LENGTH
+10+1];
35 const struct alias
* ap
;
41 /* Before calling aliases_lookup, convert the input string to upper case,
42 * and check whether it's entirely ASCII (we call gperf with option "-7"
43 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
44 * or if it's too long, it is not a valid encoding name.
46 for (to_wchar
= 0;;) {
47 /* Search tocode in the table. */
48 for (cp
= tocode
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
49 unsigned char c
= (unsigned char) *cp
;
52 if (c
>= 'a' && c
<= 'z')
61 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
67 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
76 tocode
= locale_charset();
77 /* Avoid an endless loop that could occur when using an older version
79 if (tocode
[0] == '\0')
83 ap
= aliases_lookup(buf
,bp
-buf
);
85 ap
= aliases2_lookup(buf
);
89 if (ap
->encoding_index
== ei_local_char
) {
90 tocode
= locale_charset();
91 /* Avoid an endless loop that could occur when using an older version
93 if (tocode
[0] == '\0')
97 if (ap
->encoding_index
== ei_local_wchar_t
) {
98 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
99 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
100 we know that it is UTF-16. */
101 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
102 if (sizeof(wchar_t) == 4) {
103 to_index
= ei_ucs4internal
;
106 if (sizeof(wchar_t) == 2) {
107 # if WORDS_LITTLEENDIAN
108 to_index
= ei_utf16le
;
110 to_index
= ei_utf16be
;
114 #elif __STDC_ISO_10646__
115 if (sizeof(wchar_t) == 4) {
116 to_index
= ei_ucs4internal
;
119 if (sizeof(wchar_t) == 2) {
120 to_index
= ei_ucs2internal
;
123 if (sizeof(wchar_t) == 1) {
124 to_index
= ei_iso8859_1
;
130 tocode
= locale_charset();
135 to_index
= ap
->encoding_index
;
138 for (from_wchar
= 0;;) {
139 /* Search fromcode in the table. */
140 for (cp
= fromcode
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
141 unsigned char c
= (unsigned char) *cp
;
144 if (c
>= 'a' && c
<= 'z')
153 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
158 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
165 if (buf
[0] == '\0') {
166 fromcode
= locale_charset();
167 /* Avoid an endless loop that could occur when using an older version
168 of localcharset.c. */
169 if (fromcode
[0] == '\0')
173 ap
= aliases_lookup(buf
,bp
-buf
);
175 ap
= aliases2_lookup(buf
);
179 if (ap
->encoding_index
== ei_local_char
) {
180 fromcode
= locale_charset();
181 /* Avoid an endless loop that could occur when using an older version
182 of localcharset.c. */
183 if (fromcode
[0] == '\0')
187 if (ap
->encoding_index
== ei_local_wchar_t
) {
188 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
189 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
190 we know that it is UTF-16. */
191 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
192 if (sizeof(wchar_t) == 4) {
193 from_index
= ei_ucs4internal
;
196 if (sizeof(wchar_t) == 2) {
197 # if WORDS_LITTLEENDIAN
198 from_index
= ei_utf16le
;
200 from_index
= ei_utf16be
;
204 #elif __STDC_ISO_10646__
205 if (sizeof(wchar_t) == 4) {
206 from_index
= ei_ucs4internal
;
209 if (sizeof(wchar_t) == 2) {
210 from_index
= ei_ucs2internal
;
213 if (sizeof(wchar_t) == 1) {
214 from_index
= ei_iso8859_1
;
220 fromcode
= locale_charset();
225 from_index
= ap
->encoding_index
;