CP1258 handles combining characters.
[libiconv.git] / lib / gb18030ext.h
blobe34f418933b63046185e9b2381d460abbab4e5a2
1 /*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
22 * GB18030 two-byte extension
25 static const unsigned short gb18030ext_2uni_pagea9[13] = {
26 /* 0xa9 */
27 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6,
28 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb,
30 static const unsigned short gb18030ext_2uni_pagefe[95] = {
31 /* 0xfe */
32 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
33 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
34 0x2e81, 0xfffd, 0xfffd, 0xfffd, 0x2e84, 0x3473, 0x3447, 0x2e88,
35 0x2e8b, 0xfffd, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e,
36 0x3918, 0xfffd, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0xfffd, 0xfffd,
37 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0xfffd, 0xfffd, 0x2eaa, 0x4056,
38 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0xfffd, 0x43b1,
39 0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0xfffd, 0x4723,
40 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982,
41 0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0xfffd,
42 0xfffd, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13,
43 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae,
46 static int
47 gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
49 unsigned char c1 = s[0];
50 if ((c1 == 0xa2) || (c1 >= 0xa8 && c1 <= 0xa9) || (c1 == 0xfe)) {
51 if (n >= 2) {
52 unsigned char c2 = s[1];
53 if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff)) {
54 unsigned int i = 190 * (c1 - 0x81) + (c2 - (c2 >= 0x80 ? 0x41 : 0x40));
55 unsigned short wc = 0xfffd;
56 if (i < 7410) {
57 if (i == 6432)
58 wc = 0x20ac;
59 } else if (i < 23750) {
60 if (i == 7536)
61 wc = 0x01f9;
62 else if (i >= 7672 && i < 7685)
63 wc = gb18030ext_2uni_pagea9[i-7672];
64 } else {
65 if (i < 23845)
66 wc = gb18030ext_2uni_pagefe[i-23750];
68 if (wc != 0xfffd) {
69 *pwc = (ucs4_t) wc;
70 return 2;
73 return RET_ILSEQ;
75 return RET_TOOFEW(0);
77 return RET_ILSEQ;
80 static const unsigned short gb18030ext_page2e[80] = {
81 0x0000, 0xfe50, 0x0000, 0x0000, 0xfe54, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/
82 0xfe57, 0x0000, 0x0000, 0xfe58, 0xfe5d, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/
83 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe5e, /*0x90-0x97*/
84 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x98-0x9f*/
85 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe6b, /*0xa0-0xa7*/
86 0x0000, 0x0000, 0xfe6e, 0x0000, 0x0000, 0x0000, 0xfe71, 0x0000, /*0xa8-0xaf*/
87 0x0000, 0x0000, 0x0000, 0xfe73, 0x0000, 0x0000, 0xfe74, 0xfe75, /*0xb0-0xb7*/
88 0x0000, 0x0000, 0x0000, 0xfe79, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/
89 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc0-0xc7*/
90 0x0000, 0x0000, 0xfe84, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc8-0xcf*/
92 static const unsigned short gb18030ext_page2f[16] = {
93 0xa98a, 0xa98b, 0xa98c, 0xa98d, 0xa98e, 0xa98f, 0xa990, 0xa991, /*0xf0-0xf7*/
94 0xa992, 0xa993, 0xa994, 0xa995, 0x0000, 0x0000, 0x0000, 0x0000, /*0xf8-0xff*/
96 static const unsigned short gb18030ext_page34[56] = {
97 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe56, /*0x40-0x47*/
98 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/
99 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/
100 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/
101 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/
102 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x68-0x6f*/
103 0x0000, 0x0000, 0x0000, 0xfe55, 0x0000, 0x0000, 0x0000, 0x0000, /*0x70-0x77*/
105 static const unsigned short gb18030ext_page36[24] = {
106 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x0000, /*0x08-0x0f*/
107 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x10-0x17*/
108 0x0000, 0x0000, 0xfe5b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
110 static const unsigned short gb18030ext_page39[24] = {
111 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe62, /*0xc8-0xcf*/
112 0xfe65, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xd0-0xd7*/
113 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe63, /*0xd8-0xdf*/
115 static const unsigned short gb18030ext_page43[56] = {
116 0x0000, 0x0000, 0x0000, 0x0000, 0xfe78, 0x0000, 0x0000, 0x0000, /*0xa8-0xaf*/
117 0x0000, 0xfe77, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb0-0xb7*/
118 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/
119 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc0-0xc7*/
120 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc8-0xcf*/
121 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xd0-0xd7*/
122 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe7a, 0x0000, 0x0000, /*0xd8-0xdf*/
124 static const unsigned short gb18030ext_page46[32] = {
125 0x0000, 0x0000, 0x0000, 0x0000, 0xfe7d, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/
126 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/
127 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/
128 0x0000, 0xfe7c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/
130 static const unsigned short gb18030ext_page47_1[16] = {
131 0x0000, 0x0000, 0x0000, 0xfe80, 0x0000, 0x0000, 0x0000, 0x0000, /*0x20-0x27*/
132 0x0000, 0xfe81, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x28-0x2f*/
134 static const unsigned short gb18030ext_page47_2[24] = {
135 0x0000, 0x0000, 0x0000, 0x0000, 0xfe82, 0x0000, 0x0000, 0x0000, /*0x78-0x7f*/
136 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/
137 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe83, 0x0000, 0x0000, /*0x88-0x8f*/
139 static const unsigned short gb18030ext_page49[120] = {
140 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe85, /*0x40-0x47*/
141 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/
142 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/
143 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/
144 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/
145 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x68-0x6f*/
146 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x70-0x77*/
147 0x0000, 0x0000, 0xfe86, 0x0000, 0x0000, 0xfe87, 0x0000, 0x0000, /*0x78-0x7f*/
148 0x0000, 0x0000, 0xfe88, 0xfe89, 0x0000, 0xfe8a, 0xfe8b, 0x0000, /*0x80-0x87*/
149 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/
150 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x90-0x97*/
151 0x0000, 0x0000, 0x0000, 0xfe8d, 0x0000, 0x0000, 0x0000, 0xfe8c, /*0x98-0x9f*/
152 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa0-0xa7*/
153 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa8-0xaf*/
154 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe8f, 0xfe8e, /*0xb0-0xb7*/
156 static const unsigned short gb18030ext_page4c[56] = {
157 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe96, /*0x70-0x77*/
158 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x78-0x7f*/
159 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/
160 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/
161 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x90-0x97*/
162 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe93, /*0x98-0x9f*/
163 0xfe94, 0xfe95, 0xfe97, 0xfe92, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa0-0xa7*/
165 static const unsigned short gb18030ext_page4d[16] = {
166 0x0000, 0x0000, 0x0000, 0xfe98, 0xfe99, 0xfe9a, 0xfe9b, 0xfe9c, /*0x10-0x17*/
167 0xfe9d, 0xfe9e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
170 static int
171 gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
173 if (n >= 2) {
174 unsigned short c = 0;
175 if (wc == 0x01f9)
176 c = 0xa8bf;
177 else if (wc == 0x20ac)
178 c = 0xa2e3;
179 else if (wc >= 0x2e80 && wc < 0x2ed0)
180 c = gb18030ext_page2e[wc-0x2e80];
181 else if (wc >= 0x2ff0 && wc < 0x3000)
182 c = gb18030ext_page2f[wc-0x2ff0];
183 else if (wc == 0x303e)
184 c = 0xa989;
185 else if (wc >= 0x3440 && wc < 0x3478)
186 c = gb18030ext_page34[wc-0x3440];
187 else if (wc == 0x359e)
188 c = 0xfe5a;
189 else if (wc >= 0x3608 && wc < 0x3620)
190 c = gb18030ext_page36[wc-0x3608];
191 else if (wc == 0x3918)
192 c = 0xfe60;
193 else if (wc == 0x396e)
194 c = 0xfe5f;
195 else if (wc >= 0x39c8 && wc < 0x39e0)
196 c = gb18030ext_page39[wc-0x39c8];
197 else if (wc == 0x3a73)
198 c = 0xfe64;
199 else if (wc == 0x3b4e)
200 c = 0xfe68;
201 else if (wc == 0x3c6e)
202 c = 0xfe69;
203 else if (wc == 0x3ce0)
204 c = 0xfe6a;
205 else if (wc == 0x4056)
206 c = 0xfe6f;
207 else if (wc == 0x415f)
208 c = 0xfe70;
209 else if (wc == 0x4337)
210 c = 0xfe72;
211 else if (wc >= 0x43a8 && wc < 0x43e0)
212 c = gb18030ext_page43[wc-0x43a8];
213 else if (wc == 0x44d6)
214 c = 0xfe7b;
215 else if (wc >= 0x4648 && wc < 0x4668)
216 c = gb18030ext_page46[wc-0x4648];
217 else if (wc >= 0x4720 && wc < 0x4730)
218 c = gb18030ext_page47_1[wc-0x4720];
219 else if (wc >= 0x4778 && wc < 0x4790)
220 c = gb18030ext_page47_2[wc-0x4778];
221 else if (wc >= 0x4940 && wc < 0x49b8)
222 c = gb18030ext_page49[wc-0x4940];
223 else if (wc >= 0x4c70 && wc < 0x4ca8)
224 c = gb18030ext_page4c[wc-0x4c70];
225 else if (wc >= 0x4d10 && wc < 0x4d20)
226 c = gb18030ext_page4d[wc-0x4d10];
227 else if (wc == 0x4dae)
228 c = 0xfe9f;
229 if (c != 0) {
230 r[0] = (c >> 8); r[1] = (c & 0xff);
231 return 2;
233 return RET_ILSEQ;
235 return RET_TOOSMALL;