2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines all the converters. */
24 /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
25 typedef unsigned int ucs4_t
;
27 /* State used by a conversion. 0 denotes the initial state. */
28 typedef unsigned int state_t
;
30 /* iconv_t is an opaque type. This is the real iconv_t type. */
31 typedef struct conv_struct
* conv_t
;
34 * Data type for conversion multibyte -> unicode
37 int (*xxx_mbtowc
) (conv_t conv
, ucs4_t
*pwc
, unsigned char const *s
, int n
);
39 * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n)
40 * converts the byte sequence starting at s to a wide character. Up to n bytes
41 * are available at s. n is >= 1.
42 * Result is number of bytes consumed (if a wide character was read),
43 * or 0 if invalid, or -1 if n too small, or -1-(number of bytes consumed)
44 * if only a shift sequence was read.
49 * Data type for conversion unicode -> multibyte
52 int (*xxx_wctomb
) (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
);
54 * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
55 * converts the wide character wc to the character set xxx, and stores the
56 * result beginning at r. Up to n bytes may be written at r. n is >= 1.
57 * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
59 int (*xxx_reset
) (conv_t conv
, unsigned char *r
, int n
);
61 * int xxx_reset (conv_t conv, unsigned char *r, int n)
62 * stores a shift sequences returning to the initial state beginning at r.
63 * Up to n bytes may be written at r. n is >= 0.
64 * Result is number of bytes written, or -1 if n too small.
68 /* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
70 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
71 #define RET_TOOFEW(n) (-1-(n))
72 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
73 #define RET_TOOSMALL -1
76 * Contents of a conversion descriptor.
79 struct loop_funcs lfuncs
;
80 /* Input (conversion multibyte -> unicode) */
82 struct mbtowc_funcs ifuncs
;
84 /* Output (conversion unicode -> multibyte) */
86 struct wctomb_funcs ofuncs
;
94 * Include all the converters.
99 /* General multi-byte encodings */
114 #include "ucs2internal.h"
115 #include "ucs2swapped.h"
116 #include "ucs4internal.h"
117 #include "ucs4swapped.h"
120 /* 8-bit encodings */
121 #include "iso8859_1.h"
122 #include "iso8859_2.h"
123 #include "iso8859_3.h"
124 #include "iso8859_4.h"
125 #include "iso8859_5.h"
126 #include "iso8859_6.h"
127 #include "iso8859_7.h"
128 #include "iso8859_8.h"
129 #include "iso8859_9.h"
130 #include "iso8859_10.h"
131 #include "iso8859_13.h"
132 #include "iso8859_14.h"
133 #include "iso8859_15.h"
134 #include "iso8859_16.h"
150 #include "mac_roman.h"
151 #include "mac_centraleurope.h"
152 #include "mac_iceland.h"
153 #include "mac_croatian.h"
154 #include "mac_romania.h"
155 #include "mac_cyrillic.h"
156 #include "mac_ukraine.h"
157 #include "mac_greek.h"
158 #include "mac_turkish.h"
159 #include "mac_hebrew.h"
160 #include "mac_arabic.h"
161 #include "mac_thai.h"
162 #include "hp_roman8.h"
163 #include "nextstep.h"
164 #include "armscii_8.h"
165 #include "georgian_academy.h"
166 #include "georgian_ps.h"
174 /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
177 unsigned short indx
; /* index into big table */
178 unsigned short used
; /* bitmask of used entries */
181 #include "iso646_jp.h"
182 #include "jisx0201.h"
183 #include "jisx0208.h"
184 #include "jisx0212.h"
186 #include "iso646_cn.h"
188 #include "isoir165.h"
189 /*#include "gb12345.h"*/
191 #include "cns11643.h"
195 #include "johab_hangul.h"
197 /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
202 #include "iso2022_jp.h"
203 #include "iso2022_jp1.h"
204 #include "iso2022_jp2.h"
209 #include "iso2022_cn.h"
210 #include "iso2022_cnext.h"
213 #include "ces_big5.h"
215 #include "big5hkscs.h"
220 #include "iso2022_kr.h"
222 /* Encodings used by system dependent locales. */
234 #include "dec_kanji.h"
235 #include "dec_hanyu.h"