Replace FSF snail-mail address with URL.
[libiconv.git] / lib / iso2022_cnext.h
blob9d8144af7144d642ef57772ed7218de287f5f8db
1 /*
2 * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
21 * ISO-2022-CN-EXT
24 /* Specification: RFC 1922 */
26 #define ESC 0x1b
27 #define SO 0x0e
28 #define SI 0x0f
31 * The state is composed of one of the following values
33 #define STATE_ASCII 0
34 #define STATE_TWOBYTE 1
36 * and one of the following values, << 8
38 #define STATE2_NONE 0
39 #define STATE2_DESIGNATED_GB2312 1
40 #define STATE2_DESIGNATED_CNS11643_1 2
41 #define STATE2_DESIGNATED_ISO_IR_165 3
43 * and one of the following values, << 16
45 #define STATE3_NONE 0
46 #define STATE3_DESIGNATED_CNS11643_2 1
48 * and one of the following values, << 24
50 #define STATE4_NONE 0
51 #define STATE4_DESIGNATED_CNS11643_3 1
52 #define STATE4_DESIGNATED_CNS11643_4 2
53 #define STATE4_DESIGNATED_CNS11643_5 3
54 #define STATE4_DESIGNATED_CNS11643_6 4
55 #define STATE4_DESIGNATED_CNS11643_7 5
57 #define SPLIT_STATE \
58 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
59 #define COMBINE_STATE \
60 state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
62 static int
63 iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
65 state_t state = conv->istate;
66 SPLIT_STATE;
67 int count = 0;
68 unsigned char c;
69 for (;;) {
70 c = *s;
71 if (c == ESC) {
72 if (n < count+4)
73 goto none;
74 if (s[1] == '$') {
75 if (s[2] == ')') {
76 if (s[3] == 'A') {
77 state2 = STATE2_DESIGNATED_GB2312;
78 s += 4; count += 4;
79 if (n < count+1)
80 goto none;
81 continue;
83 if (s[3] == 'G') {
84 state2 = STATE2_DESIGNATED_CNS11643_1;
85 s += 4; count += 4;
86 if (n < count+1)
87 goto none;
88 continue;
90 if (s[3] == 'E') {
91 state2 = STATE2_DESIGNATED_ISO_IR_165;
92 s += 4; count += 4;
93 if (n < count+1)
94 goto none;
95 continue;
98 if (s[2] == '*') {
99 if (s[3] == 'H') {
100 state3 = STATE3_DESIGNATED_CNS11643_2;
101 s += 4; count += 4;
102 if (n < count+1)
103 goto none;
104 continue;
107 if (s[2] == '+') {
108 if (s[3] == 'I') {
109 state4 = STATE4_DESIGNATED_CNS11643_3;
110 s += 4; count += 4;
111 if (n < count+1)
112 goto none;
113 continue;
115 if (s[3] == 'J') {
116 state4 = STATE4_DESIGNATED_CNS11643_4;
117 s += 4; count += 4;
118 if (n < count+1)
119 goto none;
120 continue;
122 if (s[3] == 'K') {
123 state4 = STATE4_DESIGNATED_CNS11643_5;
124 s += 4; count += 4;
125 if (n < count+1)
126 goto none;
127 continue;
129 if (s[3] == 'L') {
130 state4 = STATE4_DESIGNATED_CNS11643_6;
131 s += 4; count += 4;
132 if (n < count+1)
133 goto none;
134 continue;
136 if (s[3] == 'M') {
137 state4 = STATE4_DESIGNATED_CNS11643_7;
138 s += 4; count += 4;
139 if (n < count+1)
140 goto none;
141 continue;
145 if (s[1] == 'N') {
146 switch (state3) {
147 case STATE3_NONE:
148 goto ilseq;
149 case STATE3_DESIGNATED_CNS11643_2:
150 if (s[2] < 0x80 && s[3] < 0x80) {
151 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
152 if (ret == RET_ILSEQ)
153 goto ilseq;
154 if (ret != 2) abort();
155 COMBINE_STATE;
156 conv->istate = state;
157 return count+4;
158 } else
159 goto ilseq;
160 default: abort();
163 if (s[1] == 'O') {
164 switch (state4) {
165 case STATE4_NONE:
166 goto ilseq;
167 case STATE4_DESIGNATED_CNS11643_3:
168 if (s[2] < 0x80 && s[3] < 0x80) {
169 int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
170 if (ret == RET_ILSEQ)
171 goto ilseq;
172 if (ret != 2) abort();
173 COMBINE_STATE;
174 conv->istate = state;
175 return count+4;
176 } else
177 goto ilseq;
178 case STATE4_DESIGNATED_CNS11643_4:
179 if (s[2] < 0x80 && s[3] < 0x80) {
180 int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
181 if (ret == RET_ILSEQ)
182 goto ilseq;
183 if (ret != 2) abort();
184 COMBINE_STATE;
185 conv->istate = state;
186 return count+4;
187 } else
188 goto ilseq;
189 case STATE4_DESIGNATED_CNS11643_5:
190 if (s[2] < 0x80 && s[3] < 0x80) {
191 int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
192 if (ret == RET_ILSEQ)
193 goto ilseq;
194 if (ret != 2) abort();
195 COMBINE_STATE;
196 conv->istate = state;
197 return count+4;
198 } else
199 goto ilseq;
200 case STATE4_DESIGNATED_CNS11643_6:
201 if (s[2] < 0x80 && s[3] < 0x80) {
202 int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
203 if (ret == RET_ILSEQ)
204 goto ilseq;
205 if (ret != 2) abort();
206 COMBINE_STATE;
207 conv->istate = state;
208 return count+4;
209 } else
210 goto ilseq;
211 case STATE4_DESIGNATED_CNS11643_7:
212 if (s[2] < 0x80 && s[3] < 0x80) {
213 int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
214 if (ret == RET_ILSEQ)
215 goto ilseq;
216 if (ret != 2) abort();
217 COMBINE_STATE;
218 conv->istate = state;
219 return count+4;
220 } else
221 goto ilseq;
222 default: abort();
225 goto ilseq;
227 if (c == SO) {
228 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
229 goto ilseq;
230 state1 = STATE_TWOBYTE;
231 s++; count++;
232 if (n < count+1)
233 goto none;
234 continue;
236 if (c == SI) {
237 state1 = STATE_ASCII;
238 s++; count++;
239 if (n < count+1)
240 goto none;
241 continue;
243 break;
245 switch (state1) {
246 case STATE_ASCII:
247 if (c < 0x80) {
248 int ret = ascii_mbtowc(conv,pwc,s,1);
249 if (ret == RET_ILSEQ)
250 goto ilseq;
251 if (ret != 1) abort();
252 if (*pwc == 0x000a || *pwc == 0x000d) {
253 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
255 COMBINE_STATE;
256 conv->istate = state;
257 return count+1;
258 } else
259 goto ilseq;
260 case STATE_TWOBYTE:
261 if (n < count+2)
262 goto none;
263 if (s[0] < 0x80 && s[1] < 0x80) {
264 int ret;
265 switch (state2) {
266 case STATE2_NONE:
267 goto ilseq;
268 case STATE2_DESIGNATED_GB2312:
269 ret = gb2312_mbtowc(conv,pwc,s,2); break;
270 case STATE2_DESIGNATED_CNS11643_1:
271 ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
272 case STATE2_DESIGNATED_ISO_IR_165:
273 ret = isoir165_mbtowc(conv,pwc,s,2); break;
274 default: abort();
276 if (ret == RET_ILSEQ)
277 goto ilseq;
278 if (ret != 2) abort();
279 COMBINE_STATE;
280 conv->istate = state;
281 return count+2;
282 } else
283 goto ilseq;
284 default: abort();
287 none:
288 COMBINE_STATE;
289 conv->istate = state;
290 return RET_TOOFEW(count);
292 ilseq:
293 COMBINE_STATE;
294 conv->istate = state;
295 return RET_SHIFT_ILSEQ(count);
298 static int
299 iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
301 state_t state = conv->ostate;
302 SPLIT_STATE;
303 unsigned char buf[3];
304 int ret;
306 /* There is no need to handle Unicode 3.1 tag characters and to look for
307 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
309 /* Try ASCII. */
310 ret = ascii_wctomb(conv,buf,wc,1);
311 if (ret != RET_ILUNI) {
312 if (ret != 1) abort();
313 if (buf[0] < 0x80) {
314 int count = (state1 == STATE_ASCII ? 1 : 2);
315 if (n < count)
316 return RET_TOOSMALL;
317 if (state1 != STATE_ASCII) {
318 r[0] = SI;
319 r += 1;
320 state1 = STATE_ASCII;
322 r[0] = buf[0];
323 if (wc == 0x000a || wc == 0x000d) {
324 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
326 COMBINE_STATE;
327 conv->ostate = state;
328 return count;
332 /* Try GB 2312-1980. */
333 ret = gb2312_wctomb(conv,buf,wc,2);
334 if (ret != RET_ILUNI) {
335 if (ret != 2) abort();
336 if (buf[0] < 0x80 && buf[1] < 0x80) {
337 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
338 if (n < count)
339 return RET_TOOSMALL;
340 if (state2 != STATE2_DESIGNATED_GB2312) {
341 r[0] = ESC;
342 r[1] = '$';
343 r[2] = ')';
344 r[3] = 'A';
345 r += 4;
346 state2 = STATE2_DESIGNATED_GB2312;
348 if (state1 != STATE_TWOBYTE) {
349 r[0] = SO;
350 r += 1;
351 state1 = STATE_TWOBYTE;
353 r[0] = buf[0];
354 r[1] = buf[1];
355 COMBINE_STATE;
356 conv->ostate = state;
357 return count;
361 ret = cns11643_wctomb(conv,buf,wc,3);
362 if (ret != RET_ILUNI) {
363 if (ret != 3) abort();
365 /* Try CNS 11643-1992 Plane 1. */
366 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
367 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
368 if (n < count)
369 return RET_TOOSMALL;
370 if (state2 != STATE2_DESIGNATED_CNS11643_1) {
371 r[0] = ESC;
372 r[1] = '$';
373 r[2] = ')';
374 r[3] = 'G';
375 r += 4;
376 state2 = STATE2_DESIGNATED_CNS11643_1;
378 if (state1 != STATE_TWOBYTE) {
379 r[0] = SO;
380 r += 1;
381 state1 = STATE_TWOBYTE;
383 r[0] = buf[1];
384 r[1] = buf[2];
385 COMBINE_STATE;
386 conv->ostate = state;
387 return count;
390 /* Try CNS 11643-1992 Plane 2. */
391 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
392 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
393 if (n < count)
394 return RET_TOOSMALL;
395 if (state3 != STATE3_DESIGNATED_CNS11643_2) {
396 r[0] = ESC;
397 r[1] = '$';
398 r[2] = '*';
399 r[3] = 'H';
400 r += 4;
401 state3 = STATE3_DESIGNATED_CNS11643_2;
403 r[0] = ESC;
404 r[1] = 'N';
405 r[2] = buf[1];
406 r[3] = buf[2];
407 COMBINE_STATE;
408 conv->ostate = state;
409 return count;
412 /* Try CNS 11643-1992 Plane 3. */
413 if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
414 int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
415 if (n < count)
416 return RET_TOOSMALL;
417 if (state4 != STATE4_DESIGNATED_CNS11643_3) {
418 r[0] = ESC;
419 r[1] = '$';
420 r[2] = '+';
421 r[3] = 'I';
422 r += 4;
423 state4 = STATE4_DESIGNATED_CNS11643_3;
425 r[0] = ESC;
426 r[1] = 'O';
427 r[2] = buf[1];
428 r[3] = buf[2];
429 COMBINE_STATE;
430 conv->ostate = state;
431 return count;
434 /* Try CNS 11643-1992 Plane 4. */
435 if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
436 int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
437 if (n < count)
438 return RET_TOOSMALL;
439 if (state4 != STATE4_DESIGNATED_CNS11643_4) {
440 r[0] = ESC;
441 r[1] = '$';
442 r[2] = '+';
443 r[3] = 'J';
444 r += 4;
445 state4 = STATE4_DESIGNATED_CNS11643_4;
447 r[0] = ESC;
448 r[1] = 'O';
449 r[2] = buf[1];
450 r[3] = buf[2];
451 COMBINE_STATE;
452 conv->ostate = state;
453 return count;
456 /* Try CNS 11643-1992 Plane 5. */
457 if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
458 int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
459 if (n < count)
460 return RET_TOOSMALL;
461 if (state4 != STATE4_DESIGNATED_CNS11643_5) {
462 r[0] = ESC;
463 r[1] = '$';
464 r[2] = '+';
465 r[3] = 'K';
466 r += 4;
467 state4 = STATE4_DESIGNATED_CNS11643_5;
469 r[0] = ESC;
470 r[1] = 'O';
471 r[2] = buf[1];
472 r[3] = buf[2];
473 COMBINE_STATE;
474 conv->ostate = state;
475 return count;
478 /* Try CNS 11643-1992 Plane 6. */
479 if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
480 int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
481 if (n < count)
482 return RET_TOOSMALL;
483 if (state4 != STATE4_DESIGNATED_CNS11643_6) {
484 r[0] = ESC;
485 r[1] = '$';
486 r[2] = '+';
487 r[3] = 'L';
488 r += 4;
489 state4 = STATE4_DESIGNATED_CNS11643_6;
491 r[0] = ESC;
492 r[1] = 'O';
493 r[2] = buf[1];
494 r[3] = buf[2];
495 COMBINE_STATE;
496 conv->ostate = state;
497 return count;
500 /* Try CNS 11643-1992 Plane 7. */
501 if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
502 int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
503 if (n < count)
504 return RET_TOOSMALL;
505 if (state4 != STATE4_DESIGNATED_CNS11643_7) {
506 r[0] = ESC;
507 r[1] = '$';
508 r[2] = '+';
509 r[3] = 'M';
510 r += 4;
511 state4 = STATE4_DESIGNATED_CNS11643_7;
513 r[0] = ESC;
514 r[1] = 'O';
515 r[2] = buf[1];
516 r[3] = buf[2];
517 COMBINE_STATE;
518 conv->ostate = state;
519 return count;
524 /* Try ISO-IR-165. */
525 ret = isoir165_wctomb(conv,buf,wc,2);
526 if (ret != RET_ILUNI) {
527 if (ret != 2) abort();
528 if (buf[0] < 0x80 && buf[1] < 0x80) {
529 int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
530 if (n < count)
531 return RET_TOOSMALL;
532 if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
533 r[0] = ESC;
534 r[1] = '$';
535 r[2] = ')';
536 r[3] = 'E';
537 r += 4;
538 state2 = STATE2_DESIGNATED_ISO_IR_165;
540 if (state1 != STATE_TWOBYTE) {
541 r[0] = SO;
542 r += 1;
543 state1 = STATE_TWOBYTE;
545 r[0] = buf[0];
546 r[1] = buf[1];
547 COMBINE_STATE;
548 conv->ostate = state;
549 return count;
553 return RET_ILUNI;
556 static int
557 iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
559 state_t state = conv->ostate;
560 SPLIT_STATE;
561 (void)state2;
562 (void)state3;
563 (void)state4;
564 if (state1 != STATE_ASCII) {
565 if (n < 1)
566 return RET_TOOSMALL;
567 r[0] = SI;
568 /* conv->ostate = 0; will be done by the caller */
569 return 1;
570 } else
571 return 0;
574 #undef COMBINE_STATE
575 #undef SPLIT_STATE
576 #undef STATE4_DESIGNATED_CNS11643_7
577 #undef STATE4_DESIGNATED_CNS11643_6
578 #undef STATE4_DESIGNATED_CNS11643_5
579 #undef STATE4_DESIGNATED_CNS11643_4
580 #undef STATE4_DESIGNATED_CNS11643_3
581 #undef STATE4_NONE
582 #undef STATE3_DESIGNATED_CNS11643_2
583 #undef STATE3_NONE
584 #undef STATE2_DESIGNATED_ISO_IR_165
585 #undef STATE2_DESIGNATED_CNS11643_1
586 #undef STATE2_DESIGNATED_GB2312
587 #undef STATE2_NONE
588 #undef STATE_TWOBYTE
589 #undef STATE_ASCII