Update after gnulib changed.
[libiconv.git] / lib / iconv_open1.h
blobc6b24efde0554c8c8b34277371f4df501edb48a4
1 /*
2 * Copyright (C) 1999-2008, 2011, 2018, 2020 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* Part 1 of iconv_open.
21 Input: const char* tocode, const char* fromcode.
22 Output:
23 unsigned int from_index;
24 int from_wchar;
25 unsigned int to_index;
26 int to_wchar;
27 int transliterate;
28 int discard_ilseq;
29 Jumps to 'invalid' in case of errror.
32 char buf[MAX_WORD_LENGTH+10+1];
33 const char* cp;
34 char* bp;
35 const struct alias * ap;
36 unsigned int count;
38 transliterate = 0;
39 discard_ilseq = 0;
41 /* Before calling aliases_lookup, convert the input string to upper case,
42 * and check whether it's entirely ASCII (we call gperf with option "-7"
43 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
44 * or if it's too long, it is not a valid encoding name.
46 for (to_wchar = 0;;) {
47 /* Search tocode in the table. */
48 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
49 unsigned char c = (unsigned char) *cp;
50 if (c >= 0x80)
51 goto invalid;
52 if (c >= 'a' && c <= 'z')
53 c -= 'a'-'A';
54 *bp = c;
55 if (c == '\0')
56 break;
57 if (--count == 0)
58 goto invalid;
60 for (;;) {
61 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
62 bp -= 10;
63 *bp = '\0';
64 transliterate = 1;
65 continue;
67 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
68 bp -= 8;
69 *bp = '\0';
70 discard_ilseq = 1;
71 continue;
73 break;
75 if (buf[0] == '\0') {
76 tocode = locale_charset();
77 /* Avoid an endless loop that could occur when using an older version
78 of localcharset.c. */
79 if (tocode[0] == '\0')
80 goto invalid;
81 continue;
83 ap = aliases_lookup(buf,bp-buf);
84 if (ap == NULL) {
85 ap = aliases2_lookup(buf);
86 if (ap == NULL)
87 goto invalid;
89 if (ap->encoding_index == ei_local_char) {
90 tocode = locale_charset();
91 /* Avoid an endless loop that could occur when using an older version
92 of localcharset.c. */
93 if (tocode[0] == '\0')
94 goto invalid;
95 continue;
97 if (ap->encoding_index == ei_local_wchar_t) {
98 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
99 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
100 we know that it is UTF-16. */
101 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
102 if (sizeof(wchar_t) == 4) {
103 to_index = ei_ucs4internal;
104 break;
106 if (sizeof(wchar_t) == 2) {
107 # if WORDS_LITTLEENDIAN
108 to_index = ei_utf16le;
109 # else
110 to_index = ei_utf16be;
111 # endif
112 break;
114 #elif __STDC_ISO_10646__
115 if (sizeof(wchar_t) == 4) {
116 to_index = ei_ucs4internal;
117 break;
119 if (sizeof(wchar_t) == 2) {
120 to_index = ei_ucs2internal;
121 break;
123 if (sizeof(wchar_t) == 1) {
124 to_index = ei_iso8859_1;
125 break;
127 #endif
128 #if HAVE_MBRTOWC
129 to_wchar = 1;
130 tocode = locale_charset();
131 continue;
132 #endif
133 goto invalid;
135 to_index = ap->encoding_index;
136 break;
138 for (from_wchar = 0;;) {
139 /* Search fromcode in the table. */
140 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
141 unsigned char c = (unsigned char) *cp;
142 if (c >= 0x80)
143 goto invalid;
144 if (c >= 'a' && c <= 'z')
145 c -= 'a'-'A';
146 *bp = c;
147 if (c == '\0')
148 break;
149 if (--count == 0)
150 goto invalid;
152 for (;;) {
153 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
154 bp -= 10;
155 *bp = '\0';
156 continue;
158 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
159 bp -= 8;
160 *bp = '\0';
161 continue;
163 break;
165 if (buf[0] == '\0') {
166 fromcode = locale_charset();
167 /* Avoid an endless loop that could occur when using an older version
168 of localcharset.c. */
169 if (fromcode[0] == '\0')
170 goto invalid;
171 continue;
173 ap = aliases_lookup(buf,bp-buf);
174 if (ap == NULL) {
175 ap = aliases2_lookup(buf);
176 if (ap == NULL)
177 goto invalid;
179 if (ap->encoding_index == ei_local_char) {
180 fromcode = locale_charset();
181 /* Avoid an endless loop that could occur when using an older version
182 of localcharset.c. */
183 if (fromcode[0] == '\0')
184 goto invalid;
185 continue;
187 if (ap->encoding_index == ei_local_wchar_t) {
188 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
189 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
190 we know that it is UTF-16. */
191 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
192 if (sizeof(wchar_t) == 4) {
193 from_index = ei_ucs4internal;
194 break;
196 if (sizeof(wchar_t) == 2) {
197 # if WORDS_LITTLEENDIAN
198 from_index = ei_utf16le;
199 # else
200 from_index = ei_utf16be;
201 # endif
202 break;
204 #elif __STDC_ISO_10646__
205 if (sizeof(wchar_t) == 4) {
206 from_index = ei_ucs4internal;
207 break;
209 if (sizeof(wchar_t) == 2) {
210 from_index = ei_ucs2internal;
211 break;
213 if (sizeof(wchar_t) == 1) {
214 from_index = ei_iso8859_1;
215 break;
217 #endif
218 #if HAVE_WCRTOMB
219 from_wchar = 1;
220 fromcode = locale_charset();
221 continue;
222 #endif
223 goto invalid;
225 from_index = ap->encoding_index;
226 break;