2 * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
22 * GB18030 four-byte extension
25 static const unsigned short gb18030uni_charset2uni_ranges
[412] = {
26 0x0000, 0x0023, 0x0024, 0x0025, 0x0026, 0x002c, 0x002d, 0x0031,
27 0x0032, 0x0050, 0x0051, 0x0058, 0x0059, 0x005e, 0x005f, 0x005f,
28 0x0060, 0x0063, 0x0064, 0x0066, 0x0067, 0x0067, 0x0068, 0x0068,
29 0x0069, 0x006c, 0x006d, 0x007d, 0x007e, 0x0084, 0x0085, 0x0093,
30 0x0094, 0x00ab, 0x00ac, 0x00ae, 0x00af, 0x00b2, 0x00b3, 0x00cf,
31 0x00d0, 0x0131, 0x0132, 0x0132, 0x0133, 0x0133, 0x0134, 0x0134,
32 0x0135, 0x0135, 0x0136, 0x0136, 0x0137, 0x0137, 0x0138, 0x0138,
33 0x0139, 0x0154, 0x0155, 0x01ab, 0x01ac, 0x01ba, 0x01bb, 0x021f,
34 0x0220, 0x0220, 0x0221, 0x022d, 0x022e, 0x02e4, 0x02e5, 0x02e5,
35 0x02e6, 0x02ec, 0x02ed, 0x02ed, 0x02ee, 0x0324, 0x0325, 0x0332,
36 0x0333, 0x0333, 0x0334, 0x1ef1, 0x1ef2, 0x1ef3, 0x1ef4, 0x1ef4,
37 0x1ef5, 0x1ef6, 0x1ef7, 0x1efd, 0x1efe, 0x1f06, 0x1f07, 0x1f07,
38 0x1f08, 0x1f08, 0x1f09, 0x1f0d, 0x1f0e, 0x1f7d, 0x1f7e, 0x1fd3,
39 0x1fd4, 0x1fd4, 0x1fd5, 0x1fd7, 0x1fd8, 0x1fe3, 0x1fe4, 0x1fed,
40 0x1fee, 0x202b, 0x202c, 0x202f, 0x2030, 0x2045, 0x2046, 0x2047,
41 0x2048, 0x20b5, 0x20b6, 0x20bb, 0x20bc, 0x20bc, 0x20bd, 0x20bf,
42 0x20c0, 0x20c3, 0x20c4, 0x20c5, 0x20c6, 0x20c7, 0x20c8, 0x20c8,
43 0x20c9, 0x20c9, 0x20ca, 0x20cb, 0x20cc, 0x20d0, 0x20d1, 0x20d5,
44 0x20d6, 0x20df, 0x20e0, 0x20e2, 0x20e3, 0x20e7, 0x20e8, 0x20f4,
45 0x20f5, 0x20f6, 0x20f7, 0x20fc, 0x20fd, 0x2121, 0x2122, 0x2124,
46 0x2125, 0x212f, 0x2130, 0x2148, 0x2149, 0x219a, 0x219b, 0x22e7,
47 0x22e8, 0x22f1, 0x22f2, 0x2355, 0x2356, 0x2359, 0x235a, 0x2366,
48 0x2367, 0x2369, 0x236a, 0x2373, 0x2374, 0x2383, 0x2384, 0x238b,
49 0x238c, 0x2393, 0x2394, 0x2396, 0x2397, 0x2398, 0x2399, 0x23aa,
50 0x23ab, 0x23c9, 0x23ca, 0x23cb, 0x23cc, 0x2401, 0x2402, 0x2402,
51 0x2403, 0x2c40, 0x2c41, 0x2c42, 0x2c43, 0x2c45, 0x2c46, 0x2c47,
52 0x2c48, 0x2c51, 0x2c52, 0x2c60, 0x2c61, 0x2c62, 0x2c63, 0x2c65,
53 0x2c66, 0x2c69, 0x2c6a, 0x2c6b, 0x2c6c, 0x2c6e, 0x2c6f, 0x2c7c,
54 0x2c7d, 0x2da1, 0x2da2, 0x2da5, 0x2da6, 0x2da6, 0x2da7, 0x2dab,
55 0x2dac, 0x2dad, 0x2dae, 0x2dc1, 0x2dc2, 0x2dc3, 0x2dc4, 0x2dca,
56 0x2dcb, 0x2dcc, 0x2dcd, 0x2dd1, 0x2dd2, 0x2dd7, 0x2dd8, 0x2ecd,
57 0x2ece, 0x2ed4, 0x2ed5, 0x2f45, 0x2f46, 0x302f, 0x3030, 0x303b,
58 0x303c, 0x303d, 0x303e, 0x305f, 0x3060, 0x3068, 0x3069, 0x306a,
59 0x306b, 0x306c, 0x306d, 0x30dd, 0x30de, 0x3108, 0x3109, 0x3232,
60 0x3233, 0x32a1, 0x32a2, 0x32ac, 0x32ad, 0x35a9, 0x35aa, 0x35fe,
61 0x35ff, 0x365e, 0x365f, 0x366c, 0x366d, 0x36ff, 0x3700, 0x37d9,
62 0x37da, 0x38f8, 0x38f9, 0x3969, 0x396a, 0x3cde, 0x3cdf, 0x3de6,
63 0x3de7, 0x3fbd, 0x3fbe, 0x4031, 0x4032, 0x4035, 0x4036, 0x4060,
64 0x4061, 0x4158, 0x4159, 0x42cd, 0x42ce, 0x42e1, 0x42e2, 0x43a2,
65 0x43a3, 0x43a7, 0x43a8, 0x43f9, 0x43fa, 0x4409, 0x440a, 0x45c2,
66 0x45c3, 0x45f4, 0x45f5, 0x45f6, 0x45f7, 0x45fa, 0x45fb, 0x45fb,
67 0x45fc, 0x460f, 0x4610, 0x4612, 0x4613, 0x4628, 0x4629, 0x48e7,
68 0x48e8, 0x490e, 0x490f, 0x497d, 0x497e, 0x4a11, 0x4a12, 0x4a62,
70 0x82bd, 0x82bd, 0x82be, 0x82be, 0x82bf, 0x82cb,
71 0x82cc, 0x82cc, 0x82cd, 0x82d1, 0x82d2, 0x82d8, 0x82d9, 0x82dc,
72 0x82dd, 0x82e0, 0x82e1, 0x82e8, 0x82e9, 0x82ef, 0x82f0, 0x82ff,
74 0x830e, 0x93d4, 0x93d5, 0x9420, 0x9421, 0x943b,
75 0x943c, 0x948c, 0x948d, 0x9495, 0x9496, 0x94af, 0x94b0, 0x94b0,
76 0x94b1, 0x94b1, 0x94b2, 0x94b4, 0x94b5, 0x94ba, 0x94bb, 0x94bb,
77 0x94bc, 0x94bd, 0x94be, 0x98c3, 0x98c4, 0x98c4, 0x98c5, 0x98c8,
78 0x98c9, 0x98c9, 0x98ca, 0x98ca, 0x98cb, 0x98cb, 0x98cc, 0x9960,
79 0x9961, 0x99e1, 0x99e2, 0x99fb
82 static const unsigned short gb18030uni_uni2charset_ranges
[412] = {
83 0x0080, 0x00a3, 0x00a5, 0x00a6, 0x00a9, 0x00af, 0x00b2, 0x00b6,
84 0x00b8, 0x00d6, 0x00d8, 0x00df, 0x00e2, 0x00e7, 0x00eb, 0x00eb,
85 0x00ee, 0x00f1, 0x00f4, 0x00f6, 0x00f8, 0x00f8, 0x00fb, 0x00fb,
86 0x00fd, 0x0100, 0x0102, 0x0112, 0x0114, 0x011a, 0x011c, 0x012a,
87 0x012c, 0x0143, 0x0145, 0x0147, 0x0149, 0x014c, 0x014e, 0x016a,
88 0x016c, 0x01cd, 0x01cf, 0x01cf, 0x01d1, 0x01d1, 0x01d3, 0x01d3,
89 0x01d5, 0x01d5, 0x01d7, 0x01d7, 0x01d9, 0x01d9, 0x01db, 0x01db,
90 0x01dd, 0x01f8, 0x01fa, 0x0250, 0x0252, 0x0260, 0x0262, 0x02c6,
91 0x02c8, 0x02c8, 0x02cc, 0x02d8, 0x02da, 0x0390, 0x03a2, 0x03a2,
92 0x03aa, 0x03b0, 0x03c2, 0x03c2, 0x03ca, 0x0400, 0x0402, 0x040f,
93 0x0450, 0x0450, 0x0452, 0x200f, 0x2011, 0x2012, 0x2017, 0x2017,
94 0x201a, 0x201b, 0x201e, 0x2024, 0x2027, 0x202f, 0x2031, 0x2031,
95 0x2034, 0x2034, 0x2036, 0x203a, 0x203c, 0x20ab, 0x20ad, 0x2102,
96 0x2104, 0x2104, 0x2106, 0x2108, 0x210a, 0x2115, 0x2117, 0x2120,
97 0x2122, 0x215f, 0x216c, 0x216f, 0x217a, 0x218f, 0x2194, 0x2195,
98 0x219a, 0x2207, 0x2209, 0x220e, 0x2210, 0x2210, 0x2212, 0x2214,
99 0x2216, 0x2219, 0x221b, 0x221c, 0x2221, 0x2222, 0x2224, 0x2224,
100 0x2226, 0x2226, 0x222c, 0x222d, 0x222f, 0x2233, 0x2238, 0x223c,
101 0x223e, 0x2247, 0x2249, 0x224b, 0x224d, 0x2251, 0x2253, 0x225f,
102 0x2262, 0x2263, 0x2268, 0x226d, 0x2270, 0x2294, 0x2296, 0x2298,
103 0x229a, 0x22a4, 0x22a6, 0x22be, 0x22c0, 0x2311, 0x2313, 0x245f,
104 0x246a, 0x2473, 0x249c, 0x24ff, 0x254c, 0x254f, 0x2574, 0x2580,
105 0x2590, 0x2592, 0x2596, 0x259f, 0x25a2, 0x25b1, 0x25b4, 0x25bb,
106 0x25be, 0x25c5, 0x25c8, 0x25ca, 0x25cc, 0x25cd, 0x25d0, 0x25e1,
107 0x25e6, 0x2604, 0x2607, 0x2608, 0x260a, 0x263f, 0x2641, 0x2641,
108 0x2643, 0x2e80, 0x2e82, 0x2e83, 0x2e85, 0x2e87, 0x2e89, 0x2e8a,
109 0x2e8d, 0x2e96, 0x2e98, 0x2ea6, 0x2ea8, 0x2ea9, 0x2eab, 0x2ead,
110 0x2eaf, 0x2eb2, 0x2eb4, 0x2eb5, 0x2eb8, 0x2eba, 0x2ebc, 0x2ec9,
111 0x2ecb, 0x2fef, 0x2ffc, 0x2fff, 0x3004, 0x3004, 0x3018, 0x301c,
112 0x301f, 0x3020, 0x302a, 0x303d, 0x303f, 0x3040, 0x3094, 0x309a,
113 0x309f, 0x30a0, 0x30f7, 0x30fb, 0x30ff, 0x3104, 0x312a, 0x321f,
114 0x322a, 0x3230, 0x3232, 0x32a2, 0x32a4, 0x338d, 0x3390, 0x339b,
115 0x339f, 0x33a0, 0x33a2, 0x33c3, 0x33c5, 0x33cd, 0x33cf, 0x33d0,
116 0x33d3, 0x33d4, 0x33d6, 0x3446, 0x3448, 0x3472, 0x3474, 0x359d,
117 0x359f, 0x360d, 0x360f, 0x3619, 0x361b, 0x3917, 0x3919, 0x396d,
118 0x396f, 0x39ce, 0x39d1, 0x39de, 0x39e0, 0x3a72, 0x3a74, 0x3b4d,
119 0x3b4f, 0x3c6d, 0x3c6f, 0x3cdf, 0x3ce1, 0x4055, 0x4057, 0x415e,
120 0x4160, 0x4336, 0x4338, 0x43ab, 0x43ad, 0x43b0, 0x43b2, 0x43dc,
121 0x43de, 0x44d5, 0x44d7, 0x464b, 0x464d, 0x4660, 0x4662, 0x4722,
122 0x4724, 0x4728, 0x472a, 0x477b, 0x477d, 0x478c, 0x478e, 0x4946,
123 0x4948, 0x4979, 0x497b, 0x497c, 0x497e, 0x4981, 0x4984, 0x4984,
124 0x4987, 0x499a, 0x499c, 0x499e, 0x49a0, 0x49b5, 0x49b8, 0x4c76,
125 0x4c78, 0x4c9e, 0x4ca4, 0x4d12, 0x4d1a, 0x4dad, 0x4daf, 0x4dff,
127 0xe76c, 0xe76c, 0xe7c8, 0xe7c8, 0xe7e7, 0xe7f3,
128 0xe815, 0xe815, 0xe819, 0xe81d, 0xe81f, 0xe825, 0xe827, 0xe82a,
129 0xe82d, 0xe830, 0xe833, 0xe83a, 0xe83c, 0xe842, 0xe844, 0xe853,
131 0xe865, 0xf92b, 0xf92d, 0xf978, 0xf97a, 0xf994,
132 0xf996, 0xf9e6, 0xf9e8, 0xf9f0, 0xf9f2, 0xfa0b, 0xfa10, 0xfa10,
133 0xfa12, 0xfa12, 0xfa15, 0xfa17, 0xfa19, 0xfa1e, 0xfa22, 0xfa22,
134 0xfa25, 0xfa26, 0xfa2a, 0xfe2f, 0xfe32, 0xfe32, 0xfe45, 0xfe48,
135 0xfe53, 0xfe53, 0xfe58, 0xfe58, 0xfe67, 0xfe67, 0xfe6c, 0xff00,
136 0xff5f, 0xffdf, 0xffe6, 0xffff
139 static const unsigned short gb18030uni_ranges
[206] = {
140 128, 129, 131, 133, 134, 135, 137, 140,
141 142, 144, 145, 147, 148, 149, 150, 151,
142 152, 153, 154, 155, 156, 157, 158, 159,
143 160, 161, 162, 163, 164, 165, 166, 167,
144 168, 171, 172, 189, 196, 213, 220, 221,
145 285, 286, 287, 291, 293, 295, 297, 298,
146 300, 301, 302, 303, 304, 305, 306, 307,
147 308, 320, 330, 334, 338, 339, 340, 341,
148 342, 343, 347, 348, 349, 354, 355, 359,
149 360, 361, 362, 363, 365, 369, 371, 372,
150 373, 374, 375, 376, 386, 426, 502, 538,
151 553, 556, 558, 560, 562, 564, 565, 567,
152 571, 573, 574, 575, 576, 577, 578, 579,
153 581, 582, 583, 584, 585, 586, 588, 589,
154 590, 602, 606, 625, 627, 636, 637, 720,
155 724, 810, 813, 850, 860, 861, 862, 864,
156 867, 868, 869, 870, 872, 873, 874, 875,
157 876, 877, 878, 879, 880, 882, 883, 884,
158 885, 886, 887, 888, 889, 890, 891, 892,
159 893, 894, 895, 896, 897, 898, 899, 900,
160 901, 902, 903, 905, 907, 908, 909, 911,
161 912, 917, 924, 925, 21827,
163 25929, 25932, 25933, 25934, 25936, 25938, 25939, 25940,
165 25943, 25944, 25945, 25946, 25947, 25948, 25952,
166 25953, 25955, 25956, 25959, 25961, 25964, 25966, 25984,
167 25994, 25998, 26012, 26016, 26110, 26116
171 gb18030uni_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
173 unsigned char c1
= s
[0];
174 if (c1
>= 0x81 && c1
<= 0x84) {
176 unsigned char c2
= s
[1];
177 if (c2
>= 0x30 && c2
<= 0x39) {
179 unsigned char c3
= s
[2];
180 if (c3
>= 0x81 && c3
<= 0xfe) {
182 unsigned char c4
= s
[3];
183 if (c4
>= 0x30 && c4
<= 0x39) {
184 unsigned int i
= (((c1
- 0x81) * 10 + (c2
- 0x30)) * 126 + (c3
- 0x81)) * 10 + (c4
- 0x30);
185 if (i
>= 0 && i
<= 39419) {
187 unsigned int k2
= 205;
189 unsigned int k
= (k1
+ k2
) / 2;
190 if (i
<= gb18030uni_charset2uni_ranges
[2*k
+1])
192 else if (i
>= gb18030uni_charset2uni_ranges
[2*k
+2])
198 unsigned int diff
= gb18030uni_ranges
[k1
];
199 *pwc
= (ucs4_t
) (i
+ diff
);
206 return RET_TOOFEW(0);
210 return RET_TOOFEW(0);
214 return RET_TOOFEW(0);
220 gb18030uni_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
224 if (i
>= 0x0080 && i
<= 0xffff) {
226 unsigned int k2
= 205;
228 unsigned int k
= (k1
+ k2
) / 2;
229 if (i
<= gb18030uni_uni2charset_ranges
[2*k
+1])
231 else if (i
>= gb18030uni_uni2charset_ranges
[2*k
+2])
237 unsigned int diff
= gb18030uni_ranges
[k1
];
239 r
[3] = (i
% 10) + 0x30; i
= i
/ 10;
240 r
[2] = (i
% 126) + 0x81; i
= i
/ 126;
241 r
[1] = (i
% 10) + 0x30; i
= i
/ 10;