2 * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
24 /* Specification: RFC 1922 */
31 * The state is composed of one of the following values
34 #define STATE_TWOBYTE 1
36 * and one of the following values, << 8
39 #define STATE2_DESIGNATED_GB2312 1
40 #define STATE2_DESIGNATED_CNS11643_1 2
41 #define STATE2_DESIGNATED_ISO_IR_165 3
43 * and one of the following values, << 16
46 #define STATE3_DESIGNATED_CNS11643_2 1
48 * and one of the following values, << 24
51 #define STATE4_DESIGNATED_CNS11643_3 1
52 #define STATE4_DESIGNATED_CNS11643_4 2
53 #define STATE4_DESIGNATED_CNS11643_5 3
54 #define STATE4_DESIGNATED_CNS11643_6 4
55 #define STATE4_DESIGNATED_CNS11643_7 5
58 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
59 #define COMBINE_STATE \
60 state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
63 iso2022_cn_ext_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
65 state_t state
= conv
->istate
;
77 state2
= STATE2_DESIGNATED_GB2312
;
84 state2
= STATE2_DESIGNATED_CNS11643_1
;
91 state2
= STATE2_DESIGNATED_ISO_IR_165
;
100 state3
= STATE3_DESIGNATED_CNS11643_2
;
109 state4
= STATE4_DESIGNATED_CNS11643_3
;
116 state4
= STATE4_DESIGNATED_CNS11643_4
;
123 state4
= STATE4_DESIGNATED_CNS11643_5
;
130 state4
= STATE4_DESIGNATED_CNS11643_6
;
137 state4
= STATE4_DESIGNATED_CNS11643_7
;
149 case STATE3_DESIGNATED_CNS11643_2
:
150 if (s
[2] < 0x80 && s
[3] < 0x80) {
151 int ret
= cns11643_2_mbtowc(conv
,pwc
,s
+2,2);
152 if (ret
== RET_ILSEQ
)
154 if (ret
!= 2) abort();
156 conv
->istate
= state
;
167 case STATE4_DESIGNATED_CNS11643_3
:
168 if (s
[2] < 0x80 && s
[3] < 0x80) {
169 int ret
= cns11643_3_mbtowc(conv
,pwc
,s
+2,2);
170 if (ret
== RET_ILSEQ
)
172 if (ret
!= 2) abort();
174 conv
->istate
= state
;
178 case STATE4_DESIGNATED_CNS11643_4
:
179 if (s
[2] < 0x80 && s
[3] < 0x80) {
180 int ret
= cns11643_4_mbtowc(conv
,pwc
,s
+2,2);
181 if (ret
== RET_ILSEQ
)
183 if (ret
!= 2) abort();
185 conv
->istate
= state
;
189 case STATE4_DESIGNATED_CNS11643_5
:
190 if (s
[2] < 0x80 && s
[3] < 0x80) {
191 int ret
= cns11643_5_mbtowc(conv
,pwc
,s
+2,2);
192 if (ret
== RET_ILSEQ
)
194 if (ret
!= 2) abort();
196 conv
->istate
= state
;
200 case STATE4_DESIGNATED_CNS11643_6
:
201 if (s
[2] < 0x80 && s
[3] < 0x80) {
202 int ret
= cns11643_6_mbtowc(conv
,pwc
,s
+2,2);
203 if (ret
== RET_ILSEQ
)
205 if (ret
!= 2) abort();
207 conv
->istate
= state
;
211 case STATE4_DESIGNATED_CNS11643_7
:
212 if (s
[2] < 0x80 && s
[3] < 0x80) {
213 int ret
= cns11643_7_mbtowc(conv
,pwc
,s
+2,2);
214 if (ret
== RET_ILSEQ
)
216 if (ret
!= 2) abort();
218 conv
->istate
= state
;
228 if (state2
!= STATE2_DESIGNATED_GB2312
&& state2
!= STATE2_DESIGNATED_CNS11643_1
&& state2
!= STATE2_DESIGNATED_ISO_IR_165
)
230 state1
= STATE_TWOBYTE
;
237 state1
= STATE_ASCII
;
248 int ret
= ascii_mbtowc(conv
,pwc
,s
,1);
249 if (ret
== RET_ILSEQ
)
251 if (ret
!= 1) abort();
252 if (*pwc
== 0x000a || *pwc
== 0x000d) {
253 state2
= STATE2_NONE
; state3
= STATE3_NONE
; state4
= STATE3_NONE
;
256 conv
->istate
= state
;
263 if (s
[0] < 0x80 && s
[1] < 0x80) {
268 case STATE2_DESIGNATED_GB2312
:
269 ret
= gb2312_mbtowc(conv
,pwc
,s
,2); break;
270 case STATE2_DESIGNATED_CNS11643_1
:
271 ret
= cns11643_1_mbtowc(conv
,pwc
,s
,2); break;
272 case STATE2_DESIGNATED_ISO_IR_165
:
273 ret
= isoir165_mbtowc(conv
,pwc
,s
,2); break;
276 if (ret
== RET_ILSEQ
)
278 if (ret
!= 2) abort();
280 conv
->istate
= state
;
289 conv
->istate
= state
;
290 return RET_TOOFEW(count
);
294 conv
->istate
= state
;
295 return RET_SHIFT_ILSEQ(count
);
299 iso2022_cn_ext_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
301 state_t state
= conv
->ostate
;
303 unsigned char buf
[3];
306 /* There is no need to handle Unicode 3.1 tag characters and to look for
307 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
310 ret
= ascii_wctomb(conv
,buf
,wc
,1);
311 if (ret
!= RET_ILUNI
) {
312 if (ret
!= 1) abort();
314 int count
= (state1
== STATE_ASCII
? 1 : 2);
317 if (state1
!= STATE_ASCII
) {
320 state1
= STATE_ASCII
;
323 if (wc
== 0x000a || wc
== 0x000d) {
324 state2
= STATE2_NONE
; state3
= STATE3_NONE
; state4
= STATE3_NONE
;
327 conv
->ostate
= state
;
332 /* Try GB 2312-1980. */
333 ret
= gb2312_wctomb(conv
,buf
,wc
,2);
334 if (ret
!= RET_ILUNI
) {
335 if (ret
!= 2) abort();
336 if (buf
[0] < 0x80 && buf
[1] < 0x80) {
337 int count
= (state2
== STATE2_DESIGNATED_GB2312
? 0 : 4) + (state1
== STATE_TWOBYTE
? 0 : 1) + 2;
340 if (state2
!= STATE2_DESIGNATED_GB2312
) {
346 state2
= STATE2_DESIGNATED_GB2312
;
348 if (state1
!= STATE_TWOBYTE
) {
351 state1
= STATE_TWOBYTE
;
356 conv
->ostate
= state
;
361 ret
= cns11643_wctomb(conv
,buf
,wc
,3);
362 if (ret
!= RET_ILUNI
) {
363 if (ret
!= 3) abort();
365 /* Try CNS 11643-1992 Plane 1. */
366 if (buf
[0] == 1 && buf
[1] < 0x80 && buf
[2] < 0x80) {
367 int count
= (state2
== STATE2_DESIGNATED_CNS11643_1
? 0 : 4) + (state1
== STATE_TWOBYTE
? 0 : 1) + 2;
370 if (state2
!= STATE2_DESIGNATED_CNS11643_1
) {
376 state2
= STATE2_DESIGNATED_CNS11643_1
;
378 if (state1
!= STATE_TWOBYTE
) {
381 state1
= STATE_TWOBYTE
;
386 conv
->ostate
= state
;
390 /* Try CNS 11643-1992 Plane 2. */
391 if (buf
[0] == 2 && buf
[1] < 0x80 && buf
[2] < 0x80) {
392 int count
= (state3
== STATE3_DESIGNATED_CNS11643_2
? 0 : 4) + 4;
395 if (state3
!= STATE3_DESIGNATED_CNS11643_2
) {
401 state3
= STATE3_DESIGNATED_CNS11643_2
;
408 conv
->ostate
= state
;
412 /* Try CNS 11643-1992 Plane 3. */
413 if (buf
[0] == 3 && buf
[1] < 0x80 && buf
[2] < 0x80) {
414 int count
= (state4
== STATE4_DESIGNATED_CNS11643_3
? 0 : 4) + 4;
417 if (state4
!= STATE4_DESIGNATED_CNS11643_3
) {
423 state4
= STATE4_DESIGNATED_CNS11643_3
;
430 conv
->ostate
= state
;
434 /* Try CNS 11643-1992 Plane 4. */
435 if (buf
[0] == 4 && buf
[1] < 0x80 && buf
[2] < 0x80) {
436 int count
= (state4
== STATE4_DESIGNATED_CNS11643_4
? 0 : 4) + 4;
439 if (state4
!= STATE4_DESIGNATED_CNS11643_4
) {
445 state4
= STATE4_DESIGNATED_CNS11643_4
;
452 conv
->ostate
= state
;
456 /* Try CNS 11643-1992 Plane 5. */
457 if (buf
[0] == 5 && buf
[1] < 0x80 && buf
[2] < 0x80) {
458 int count
= (state4
== STATE4_DESIGNATED_CNS11643_5
? 0 : 4) + 4;
461 if (state4
!= STATE4_DESIGNATED_CNS11643_5
) {
467 state4
= STATE4_DESIGNATED_CNS11643_5
;
474 conv
->ostate
= state
;
478 /* Try CNS 11643-1992 Plane 6. */
479 if (buf
[0] == 6 && buf
[1] < 0x80 && buf
[2] < 0x80) {
480 int count
= (state4
== STATE4_DESIGNATED_CNS11643_6
? 0 : 4) + 4;
483 if (state4
!= STATE4_DESIGNATED_CNS11643_6
) {
489 state4
= STATE4_DESIGNATED_CNS11643_6
;
496 conv
->ostate
= state
;
500 /* Try CNS 11643-1992 Plane 7. */
501 if (buf
[0] == 7 && buf
[1] < 0x80 && buf
[2] < 0x80) {
502 int count
= (state4
== STATE4_DESIGNATED_CNS11643_7
? 0 : 4) + 4;
505 if (state4
!= STATE4_DESIGNATED_CNS11643_7
) {
511 state4
= STATE4_DESIGNATED_CNS11643_7
;
518 conv
->ostate
= state
;
524 /* Try ISO-IR-165. */
525 ret
= isoir165_wctomb(conv
,buf
,wc
,2);
526 if (ret
!= RET_ILUNI
) {
527 if (ret
!= 2) abort();
528 if (buf
[0] < 0x80 && buf
[1] < 0x80) {
529 int count
= (state2
== STATE2_DESIGNATED_ISO_IR_165
? 0 : 4) + (state1
== STATE_TWOBYTE
? 0 : 1) + 2;
532 if (state2
!= STATE2_DESIGNATED_ISO_IR_165
) {
538 state2
= STATE2_DESIGNATED_ISO_IR_165
;
540 if (state1
!= STATE_TWOBYTE
) {
543 state1
= STATE_TWOBYTE
;
548 conv
->ostate
= state
;
557 iso2022_cn_ext_reset (conv_t conv
, unsigned char *r
, int n
)
559 state_t state
= conv
->ostate
;
564 if (state1
!= STATE_ASCII
) {
568 /* conv->ostate = 0; will be done by the caller */
576 #undef STATE4_DESIGNATED_CNS11643_7
577 #undef STATE4_DESIGNATED_CNS11643_6
578 #undef STATE4_DESIGNATED_CNS11643_5
579 #undef STATE4_DESIGNATED_CNS11643_4
580 #undef STATE4_DESIGNATED_CNS11643_3
582 #undef STATE3_DESIGNATED_CNS11643_2
584 #undef STATE2_DESIGNATED_ISO_IR_165
585 #undef STATE2_DESIGNATED_CNS11643_1
586 #undef STATE2_DESIGNATED_GB2312