Prefer #include <...> for system headers.
[libiconv.git] / lib / iconv_open1.h
bloba7288a458ad7b5e8b9833d97ffe4715bf75f0430
1 /*
2 * Copyright (C) 1999-2008, 2011, 2018, 2020, 2023 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* Part 1 of iconv_open.
21 Input: const char* tocode, const char* fromcode.
22 Output:
23 unsigned int from_index;
24 int from_wchar;
25 unsigned int from_surface;
26 unsigned int to_index;
27 int to_wchar;
28 unsigned int to_surface;
29 int transliterate;
30 int discard_ilseq;
31 Jumps to 'invalid' in case of errror.
34 char buf[MAX_WORD_LENGTH+9+9+1];
35 const char* cp;
36 char* bp;
37 const struct alias * ap;
38 unsigned int count;
40 from_surface = ICONV_SURFACE_NONE;
41 to_surface = ICONV_SURFACE_NONE;
42 transliterate = 0;
43 discard_ilseq = 0;
45 /* Before calling aliases_lookup, convert the input string to upper case,
46 * and check whether it's entirely ASCII (we call gperf with option "-7"
47 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
48 * or if it's too long, it is not a valid encoding name.
50 for (to_wchar = 0;;) {
51 /* Search tocode in the table. */
52 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+9+9+1; ; cp++, bp++) {
53 unsigned char c = (unsigned char) *cp;
54 if (c >= 0x80)
55 goto invalid;
56 if (c >= 'a' && c <= 'z')
57 c -= 'a'-'A';
58 *bp = c;
59 if (c == '\0')
60 break;
61 if (--count == 0)
62 goto invalid;
64 for (;;) {
65 char *sp = bp;
66 int parsed_translit = 0;
67 int parsed_ignore = 0;
68 if (sp-buf > 9 && memcmp(sp-9,"/TRANSLIT",9)==0) {
69 sp = sp - 9;
70 parsed_translit = 1;
71 } else if (sp-buf > 7 && memcmp(sp-7,"/IGNORE",7)==0) {
72 sp = sp - 7;
73 parsed_ignore = 1;
75 if (sp > buf && memcmp(sp-1,"/",1) == 0) {
76 bp = sp - 1;
77 } else if (sp-buf >= 9 && memcmp(sp-9,"/ZOS_UNIX",9)==0) {
78 bp = sp - 9;
79 to_surface = ICONV_SURFACE_EBCDIC_ZOS_UNIX;
80 } else
81 break;
82 *bp = '\0';
83 if (parsed_translit)
84 transliterate = 1;
85 if (parsed_ignore)
86 discard_ilseq = 1;
87 break;
89 if (buf[0] == '\0') {
90 tocode = locale_charset();
91 /* Avoid an endless loop that could occur when using an older version
92 of localcharset.c. */
93 if (tocode[0] == '\0')
94 goto invalid;
95 continue;
97 ap = aliases_lookup(buf,bp-buf);
98 if (ap == NULL) {
99 ap = aliases2_lookup(buf);
100 if (ap == NULL)
101 goto invalid;
103 if (ap->encoding_index == ei_local_char) {
104 tocode = locale_charset();
105 /* Avoid an endless loop that could occur when using an older version
106 of localcharset.c. */
107 if (tocode[0] == '\0')
108 goto invalid;
109 continue;
111 if (ap->encoding_index == ei_local_wchar_t) {
112 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
113 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
114 we know that it is UTF-16. */
115 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
116 if (sizeof(wchar_t) == 4) {
117 to_index = ei_ucs4internal;
118 break;
120 if (sizeof(wchar_t) == 2) {
121 # if WORDS_LITTLEENDIAN
122 to_index = ei_utf16le;
123 # else
124 to_index = ei_utf16be;
125 # endif
126 break;
128 #elif __STDC_ISO_10646__
129 if (sizeof(wchar_t) == 4) {
130 to_index = ei_ucs4internal;
131 break;
133 if (sizeof(wchar_t) == 2) {
134 to_index = ei_ucs2internal;
135 break;
137 if (sizeof(wchar_t) == 1) {
138 to_index = ei_iso8859_1;
139 break;
141 #endif
142 #if HAVE_MBRTOWC
143 to_wchar = 1;
144 tocode = locale_charset();
145 continue;
146 #endif
147 goto invalid;
149 to_index = ap->encoding_index;
150 break;
152 for (from_wchar = 0;;) {
153 /* Search fromcode in the table. */
154 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+9+9+1; ; cp++, bp++) {
155 unsigned char c = (unsigned char) *cp;
156 if (c >= 0x80)
157 goto invalid;
158 if (c >= 'a' && c <= 'z')
159 c -= 'a'-'A';
160 *bp = c;
161 if (c == '\0')
162 break;
163 if (--count == 0)
164 goto invalid;
166 for (;;) {
167 char *sp = bp;
168 int parsed_translit = 0;
169 int parsed_ignore = 0;
170 if (sp-buf > 9 && memcmp(sp-9,"/TRANSLIT",9)==0) {
171 sp = sp - 9;
172 parsed_translit = 1;
173 } else if (sp-buf > 7 && memcmp(sp-7,"/IGNORE",7)==0) {
174 sp = sp - 7;
175 parsed_ignore = 1;
177 if (sp > buf && memcmp(sp-1,"/",1) == 0) {
178 bp = sp - 1;
179 } else if (sp-buf >= 9 && memcmp(sp-9,"/ZOS_UNIX",9)==0) {
180 bp = sp - 9;
181 from_surface = ICONV_SURFACE_EBCDIC_ZOS_UNIX;
182 } else
183 break;
184 *bp = '\0';
185 if (parsed_translit)
186 transliterate = 1;
187 if (parsed_ignore)
188 discard_ilseq = 1;
189 break;
191 if (buf[0] == '\0') {
192 fromcode = locale_charset();
193 /* Avoid an endless loop that could occur when using an older version
194 of localcharset.c. */
195 if (fromcode[0] == '\0')
196 goto invalid;
197 continue;
199 ap = aliases_lookup(buf,bp-buf);
200 if (ap == NULL) {
201 ap = aliases2_lookup(buf);
202 if (ap == NULL)
203 goto invalid;
205 if (ap->encoding_index == ei_local_char) {
206 fromcode = locale_charset();
207 /* Avoid an endless loop that could occur when using an older version
208 of localcharset.c. */
209 if (fromcode[0] == '\0')
210 goto invalid;
211 continue;
213 if (ap->encoding_index == ei_local_wchar_t) {
214 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
215 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
216 we know that it is UTF-16. */
217 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
218 if (sizeof(wchar_t) == 4) {
219 from_index = ei_ucs4internal;
220 break;
222 if (sizeof(wchar_t) == 2) {
223 # if WORDS_LITTLEENDIAN
224 from_index = ei_utf16le;
225 # else
226 from_index = ei_utf16be;
227 # endif
228 break;
230 #elif __STDC_ISO_10646__
231 if (sizeof(wchar_t) == 4) {
232 from_index = ei_ucs4internal;
233 break;
235 if (sizeof(wchar_t) == 2) {
236 from_index = ei_ucs2internal;
237 break;
239 if (sizeof(wchar_t) == 1) {
240 from_index = ei_iso8859_1;
241 break;
243 #endif
244 #if HAVE_WCRTOMB
245 from_wchar = 1;
246 fromcode = locale_charset();
247 continue;
248 #endif
249 goto invalid;
251 from_index = ap->encoding_index;
252 break;