2 * Copyright (C) 1999-2002, 2004-2011, 2016, 2022-2023 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* This file defines all the converters. */
23 /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
24 typedef unsigned int ucs4_t
;
26 /* State used by a conversion. 0 denotes the initial state. */
27 typedef unsigned int state_t
;
29 /* iconv_t is an opaque type. This is the real iconv_t type. */
30 typedef struct conv_struct
* conv_t
;
33 * Data type for conversion multibyte -> unicode
36 int (*xxx_mbtowc
) (conv_t conv
, ucs4_t
*pwc
, unsigned char const *s
, size_t n
);
38 * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n)
39 * converts the byte sequence starting at s to a wide character. Up to n bytes
40 * are available at s. n is >= 1.
41 * Result is number of bytes consumed (if a wide character was read),
42 * or -1 if invalid, or -2 if n too small,
43 * or RET_SHIFT_ILSEQ(number of bytes consumed) if invalid input after a shift
45 * or RET_TOOFEW(number of bytes consumed) if only a shift sequence was read.
47 int (*xxx_flushwc
) (conv_t conv
, ucs4_t
*pwc
);
49 * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
50 * returns to the initial state and stores the pending wide character, if any.
51 * Result is 1 (if a wide character was read) or 0 if none was pending.
55 /* Return code if invalid input after a shift sequence of n bytes was read.
57 #define RET_SHIFT_ILSEQ(n) (-1-2*(n))
58 /* Return code if invalid. (xxx_mbtowc) */
59 #define RET_ILSEQ RET_SHIFT_ILSEQ(0)
60 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
61 #define RET_TOOFEW(n) (-2-2*(n))
62 /* Retrieve the n from the encoded RET_... value. */
63 #define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
64 #define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
65 /* Maximum value of n that may be used as argument to RET_SHIFT_ILSEQ or RET_TOOFEW. */
66 #define RET_COUNT_MAX ((INT_MAX / 2) - 1)
69 * Data type for conversion unicode -> multibyte
72 int (*xxx_wctomb
) (conv_t conv
, unsigned char *r
, ucs4_t wc
, size_t n
);
74 * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
75 * converts the wide character wc to the character set xxx, and stores the
76 * result beginning at r. Up to n bytes may be written at r. n is >= 1.
77 * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
79 int (*xxx_reset
) (conv_t conv
, unsigned char *r
, size_t n
);
81 * int xxx_reset (conv_t conv, unsigned char *r, size_t n)
82 * stores a shift sequences returning to the initial state beginning at r.
83 * Up to n bytes may be written at r. n is >= 0.
84 * Result is number of bytes written, or -2 if n too small.
88 /* Return code if invalid. (xxx_wctomb) */
90 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
91 #define RET_TOOSMALL -2
94 * Contents of a conversion descriptor.
97 struct loop_funcs lfuncs
;
98 /* Input (conversion multibyte -> unicode) */
100 struct mbtowc_funcs ifuncs
;
101 unsigned int isurface
;
103 /* Output (conversion unicode -> multibyte) */
105 struct wctomb_funcs ofuncs
;
107 unsigned int osurface
;
109 /* Operation flags */
112 #ifndef LIBICONV_PLUG
113 struct iconv_fallbacks fallbacks
;
114 struct iconv_hooks hooks
;
119 * Include all the converters.
124 /* General multi-byte encodings */
139 #include "ucs2internal.h"
140 #include "ucs2swapped.h"
141 #include "ucs4internal.h"
142 #include "ucs4swapped.h"
146 /* 8-bit encodings */
147 #include "iso8859_1.h"
148 #include "iso8859_2.h"
149 #include "iso8859_3.h"
150 #include "iso8859_4.h"
151 #include "iso8859_5.h"
152 #include "iso8859_6.h"
153 #include "iso8859_7.h"
154 #include "iso8859_8.h"
155 #include "iso8859_9.h"
156 #include "iso8859_10.h"
157 #include "iso8859_11.h"
158 #include "iso8859_13.h"
159 #include "iso8859_14.h"
160 #include "iso8859_15.h"
161 #include "iso8859_16.h"
178 #include "mac_roman.h"
179 #include "mac_centraleurope.h"
180 #include "mac_iceland.h"
181 #include "mac_croatian.h"
182 #include "mac_romania.h"
183 #include "mac_cyrillic.h"
184 #include "mac_ukraine.h"
185 #include "mac_greek.h"
186 #include "mac_turkish.h"
187 #include "mac_hebrew.h"
188 #include "mac_arabic.h"
189 #include "mac_thai.h"
190 #include "hp_roman8.h"
191 #include "nextstep.h"
192 #include "armscii_8.h"
193 #include "georgian_academy.h"
194 #include "georgian_ps.h"
205 /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
208 unsigned short indx
; /* index into big table */
209 unsigned short used
; /* bitmask of used entries */
212 #include "iso646_jp.h"
213 #include "jisx0201.h"
214 #include "jisx0208.h"
215 #include "jisx0212.h"
217 #include "iso646_cn.h"
219 #include "isoir165.h"
220 /*#include "gb12345.h"*/
222 #include "cns11643.h"
226 #include "johab_hangul.h"
228 /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
233 #include "iso2022_jp.h"
234 #include "iso2022_jp1.h"
235 #include "iso2022_jp2.h"
236 #include "iso2022_jpms.h"
241 #include "gb18030_2005.h"
242 #include "gb18030_2022.h"
243 #include "iso2022_cn.h"
244 #include "iso2022_cnext.h"
247 #include "ces_big5.h"
249 #include "big5hkscs1999.h"
250 #include "big5hkscs2001.h"
251 #include "big5hkscs2004.h"
252 #include "big5hkscs2008.h"
257 #include "iso2022_kr.h"
259 /* Encodings used by system dependent locales. */
274 #include "dec_kanji.h"
275 #include "dec_hanyu.h"
298 #define DEDUPLICATE_TABLES 1
300 /* Swaps the values 0x15 and 0x25.
301 Both gcc and clang compile this expression to something that involves as few
302 conditional branching instructions as possible. */
303 #define swap_x15_x25_a(x) ((x) == 0x15 ? 0x25 : (x) == 0x25 ? 0x15 : (x))
304 #define swap_x15_x25_b(x) ((x) ^ ((x) == 0x15 || (x) == 0x25 ? 0x30 : 0))
305 #define swap_x15_x25_c(x) ((x) ^ ((((x) - 0x15) & ~0x10) == 0 ? 0x30 : 0))
306 /* Number of conditional branches (with "gcc -O2", as of 2023):
325 #define swap_x15_x25 swap_x15_x25_c
327 #include "ebcdic037.h"
328 #include "ebcdic273.h"
329 #include "ebcdic277.h"
330 #include "ebcdic278.h"
331 #include "ebcdic280.h"
332 #include "ebcdic282.h"
333 #include "ebcdic284.h"
334 #include "ebcdic285.h"
335 #include "ebcdic297.h"
336 #include "ebcdic423.h"
337 #include "ebcdic424.h"
338 #include "ebcdic425.h"
339 #include "ebcdic500.h"
340 #include "ebcdic838.h"
341 #include "ebcdic870.h"
342 #include "ebcdic871.h"
343 #include "ebcdic875.h"
344 #include "ebcdic880.h"
345 #include "ebcdic905.h"
346 #include "ebcdic924.h"
347 #include "ebcdic1025.h"
348 #include "ebcdic1026.h"
349 #include "ebcdic1047.h"
350 #include "ebcdic1097.h"
351 #include "ebcdic1112.h"
352 #include "ebcdic1122.h"
353 #include "ebcdic1123.h"
354 #include "ebcdic1130.h"
355 #include "ebcdic1132.h"
356 #include "ebcdic1137.h"
357 #include "ebcdic1140.h"
358 #include "ebcdic1141.h"
359 #include "ebcdic1142.h"
360 #include "ebcdic1143.h"
361 #include "ebcdic1144.h"
362 #include "ebcdic1145.h"
363 #include "ebcdic1146.h"
364 #include "ebcdic1147.h"
365 #include "ebcdic1148.h"
366 #include "ebcdic1149.h"
367 #include "ebcdic1153.h"
368 #include "ebcdic1154.h"
369 #include "ebcdic1155.h"
370 #include "ebcdic1156.h"
371 #include "ebcdic1157.h"
372 #include "ebcdic1158.h"
373 #include "ebcdic1160.h"
374 #include "ebcdic1164.h"
375 #include "ebcdic1165.h"
376 #include "ebcdic1166.h"
377 #include "ebcdic4971.h"
378 #include "ebcdic12712.h"
379 #include "ebcdic16804.h"
383 #include "euc_jisx0213.h"
384 #include "shift_jisx0213.h"
385 #include "iso2022_jp3.h"
386 #include "big5_2003.h"