2 * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
21 /* Part 1 of iconv_open.
22 Input: const char* tocode, const char* fromcode.
24 unsigned int from_index;
26 unsigned int to_index;
30 Jumps to 'invalid' in case of errror.
33 char buf
[MAX_WORD_LENGTH
+10+1];
36 const struct alias
* ap
;
42 /* Before calling aliases_lookup, convert the input string to upper case,
43 * and check whether it's entirely ASCII (we call gperf with option "-7"
44 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
45 * or if it's too long, it is not a valid encoding name.
47 for (to_wchar
= 0;;) {
48 /* Search tocode in the table. */
49 for (cp
= tocode
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
50 unsigned char c
= * (unsigned char *) cp
;
53 if (c
>= 'a' && c
<= 'z')
62 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
68 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
77 tocode
= locale_charset();
78 /* Avoid an endless loop that could occur when using an older version
80 if (tocode
[0] == '\0')
84 ap
= aliases_lookup(buf
,bp
-buf
);
86 ap
= aliases2_lookup(buf
);
90 if (ap
->encoding_index
== ei_local_char
) {
91 tocode
= locale_charset();
92 /* Avoid an endless loop that could occur when using an older version
94 if (tocode
[0] == '\0')
98 if (ap
->encoding_index
== ei_local_wchar_t
) {
99 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
100 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
101 we know that it is UTF-16. */
102 #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
103 if (sizeof(wchar_t) == 4) {
104 to_index
= ei_ucs4internal
;
107 if (sizeof(wchar_t) == 2) {
108 # if WORDS_LITTLEENDIAN
109 to_index
= ei_utf16le
;
111 to_index
= ei_utf16be
;
115 #elif __STDC_ISO_10646__
116 if (sizeof(wchar_t) == 4) {
117 to_index
= ei_ucs4internal
;
120 if (sizeof(wchar_t) == 2) {
121 to_index
= ei_ucs2internal
;
124 if (sizeof(wchar_t) == 1) {
125 to_index
= ei_iso8859_1
;
131 tocode
= locale_charset();
136 to_index
= ap
->encoding_index
;
139 for (from_wchar
= 0;;) {
140 /* Search fromcode in the table. */
141 for (cp
= fromcode
, bp
= buf
, count
= MAX_WORD_LENGTH
+10+1; ; cp
++, bp
++) {
142 unsigned char c
= * (unsigned char *) cp
;
145 if (c
>= 'a' && c
<= 'z')
154 if (bp
-buf
>= 10 && memcmp(bp
-10,"//TRANSLIT",10)==0) {
159 if (bp
-buf
>= 8 && memcmp(bp
-8,"//IGNORE",8)==0) {
166 if (buf
[0] == '\0') {
167 fromcode
= locale_charset();
168 /* Avoid an endless loop that could occur when using an older version
169 of localcharset.c. */
170 if (fromcode
[0] == '\0')
174 ap
= aliases_lookup(buf
,bp
-buf
);
176 ap
= aliases2_lookup(buf
);
180 if (ap
->encoding_index
== ei_local_char
) {
181 fromcode
= locale_charset();
182 /* Avoid an endless loop that could occur when using an older version
183 of localcharset.c. */
184 if (fromcode
[0] == '\0')
188 if (ap
->encoding_index
== ei_local_wchar_t
) {
189 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
190 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
191 we know that it is UTF-16. */
192 #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
193 if (sizeof(wchar_t) == 4) {
194 from_index
= ei_ucs4internal
;
197 if (sizeof(wchar_t) == 2) {
198 # if WORDS_LITTLEENDIAN
199 from_index
= ei_utf16le
;
201 from_index
= ei_utf16be
;
205 #elif __STDC_ISO_10646__
206 if (sizeof(wchar_t) == 4) {
207 from_index
= ei_ucs4internal
;
210 if (sizeof(wchar_t) == 2) {
211 from_index
= ei_ucs2internal
;
214 if (sizeof(wchar_t) == 1) {
215 from_index
= ei_iso8859_1
;
221 fromcode
= locale_charset();
226 from_index
= ap
->encoding_index
;