2 * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
24 /* Specification: RFC 1554 */
25 /* ESC '(' 'I' for JISX0201 Katakana is an extension not found in RFC 1554 or
26 CJK.INF, but implemented in glibc-2.1 and qt-2.0. */
31 * The state is composed of one of the following values
34 #define STATE_JISX0201ROMAN 1
35 #define STATE_JISX0201KATAKANA 2
36 #define STATE_JISX0208 3
37 #define STATE_JISX0212 4
38 #define STATE_GB2312 5
39 #define STATE_KSC5601 6
41 * and one of the following values, << 8
43 #define STATE_G2_NONE 0
44 #define STATE_G2_ISO8859_1 1
45 #define STATE_G2_ISO8859_7 2
48 unsigned int state1 = state & 0xff, state2 = state >> 8
49 #define COMBINE_STATE \
50 state = (state2 << 8) | state1
53 iso2022_jp2_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, size_t n
)
55 state_t state
= conv
->istate
;
73 state1
= STATE_JISX0201ROMAN
;
80 state1
= STATE_JISX0201KATAKANA
;
89 if (s
[2] == '@' || s
[2] == 'B') {
90 /* We don't distinguish JIS X 0208-1978 and JIS X 0208-1983. */
91 state1
= STATE_JISX0208
;
98 state1
= STATE_GB2312
;
108 state1
= STATE_JISX0212
;
115 state1
= STATE_KSC5601
;
129 state2
= STATE_G2_ISO8859_1
;
136 state2
= STATE_G2_ISO8859_7
;
148 case STATE_G2_ISO8859_1
:
150 unsigned char buf
= s
[2]+0x80;
151 int ret
= iso8859_1_mbtowc(conv
,pwc
,&buf
,1);
152 if (ret
== RET_ILSEQ
)
154 if (ret
!= 1) abort();
156 conv
->istate
= state
;
160 case STATE_G2_ISO8859_7
:
162 unsigned char buf
= s
[2]+0x80;
163 int ret
= iso8859_7_mbtowc(conv
,pwc
,&buf
,1);
164 if (ret
== RET_ILSEQ
)
166 if (ret
!= 1) abort();
168 conv
->istate
= state
;
182 int ret
= ascii_mbtowc(conv
,pwc
,s
,1);
183 if (ret
== RET_ILSEQ
)
185 if (ret
!= 1) abort();
186 if (*pwc
== 0x000a || *pwc
== 0x000d)
187 state2
= STATE_G2_NONE
;
189 conv
->istate
= state
;
193 case STATE_JISX0201ROMAN
:
195 int ret
= jisx0201_mbtowc(conv
,pwc
,s
,1);
196 if (ret
== RET_ILSEQ
)
198 if (ret
!= 1) abort();
199 if (*pwc
== 0x000a || *pwc
== 0x000d)
200 state2
= STATE_G2_NONE
;
202 conv
->istate
= state
;
206 case STATE_JISX0201KATAKANA
:
208 unsigned char buf
= c
+0x80;
209 int ret
= jisx0201_mbtowc(conv
,pwc
,&buf
,1);
210 if (ret
== RET_ILSEQ
)
212 if (ret
!= 1) abort();
214 conv
->istate
= state
;
221 if (s
[0] < 0x80 && s
[1] < 0x80) {
222 int ret
= jisx0208_mbtowc(conv
,pwc
,s
,2);
223 if (ret
== RET_ILSEQ
)
225 if (ret
!= 2) abort();
227 conv
->istate
= state
;
234 if (s
[0] < 0x80 && s
[1] < 0x80) {
235 int ret
= jisx0212_mbtowc(conv
,pwc
,s
,2);
236 if (ret
== RET_ILSEQ
)
238 if (ret
!= 2) abort();
240 conv
->istate
= state
;
247 if (s
[0] < 0x80 && s
[1] < 0x80) {
248 int ret
= gb2312_mbtowc(conv
,pwc
,s
,2);
249 if (ret
== RET_ILSEQ
)
251 if (ret
!= 2) abort();
253 conv
->istate
= state
;
260 if (s
[0] < 0x80 && s
[1] < 0x80) {
261 int ret
= ksc5601_mbtowc(conv
,pwc
,s
,2);
262 if (ret
== RET_ILSEQ
)
264 if (ret
!= 2) abort();
266 conv
->istate
= state
;
275 conv
->istate
= state
;
276 return RET_TOOFEW(count
);
280 conv
->istate
= state
;
281 return RET_SHIFT_ILSEQ(count
);
288 * The state can also contain one of the following values, << 16.
289 * Values >= STATE_TAG_LANGUAGE are temporary tag parsing states.
291 #define STATE_TAG_NONE 0
292 #define STATE_TAG_LANGUAGE 4
293 #define STATE_TAG_LANGUAGE_j 5
294 #define STATE_TAG_LANGUAGE_ja 1
295 #define STATE_TAG_LANGUAGE_k 6
296 #define STATE_TAG_LANGUAGE_ko 2
297 #define STATE_TAG_LANGUAGE_z 7
298 #define STATE_TAG_LANGUAGE_zh 3
300 #define SPLIT_STATE \
301 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
302 #define COMBINE_STATE \
303 state = (state3 << 16) | (state2 << 8) | state1
306 iso2022_jp2_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, size_t n
)
308 state_t state
= conv
->ostate
;
310 unsigned char buf
[2];
312 /* This defines the conversion preferences depending on the current
314 enum conversion
{ none
= 0, european
, japanese
, chinese
, korean
, other
};
315 static const unsigned int conversion_lists
[STATE_TAG_LANGUAGE
] = {
317 japanese
+ (european
<< 3) + (chinese
<< 6) + (korean
<< 9) + (other
<< 12),
318 /* STATE_TAG_LANGUAGE_ja */
319 japanese
+ (european
<< 3) + (chinese
<< 6) + (korean
<< 9) + (other
<< 12),
320 /* STATE_TAG_LANGUAGE_ko */
321 korean
+ (european
<< 3) + (japanese
<< 6) + (chinese
<< 9) + (other
<< 12),
322 /* STATE_TAG_LANGUAGE_zh */
323 chinese
+ (european
<< 3) + (japanese
<< 6) + (korean
<< 9) + (other
<< 12)
325 unsigned int conversion_list
;
327 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
328 if ((wc
>> 7) == (0xe0000 >> 7)) {
330 if (c
>= 'A' && c
<= 'Z')
334 state3
= STATE_TAG_LANGUAGE
;
336 conv
->ostate
= state
;
339 if (state3
== STATE_TAG_LANGUAGE
) {
340 state3
= STATE_TAG_LANGUAGE_j
;
342 conv
->ostate
= state
;
347 if (state3
== STATE_TAG_LANGUAGE_j
) {
348 state3
= STATE_TAG_LANGUAGE_ja
;
350 conv
->ostate
= state
;
355 if (state3
== STATE_TAG_LANGUAGE
) {
356 state3
= STATE_TAG_LANGUAGE_k
;
358 conv
->ostate
= state
;
363 if (state3
== STATE_TAG_LANGUAGE_k
) {
364 state3
= STATE_TAG_LANGUAGE_ko
;
366 conv
->ostate
= state
;
371 if (state3
== STATE_TAG_LANGUAGE
) {
372 state3
= STATE_TAG_LANGUAGE_z
;
374 conv
->ostate
= state
;
379 if (state3
== STATE_TAG_LANGUAGE_z
) {
380 state3
= STATE_TAG_LANGUAGE_zh
;
382 conv
->ostate
= state
;
387 state3
= STATE_TAG_NONE
;
389 conv
->ostate
= state
;
394 /* Other tag characters reset the tag parsing state or are ignored. */
395 if (state3
>= STATE_TAG_LANGUAGE
)
396 state3
= STATE_TAG_NONE
;
398 conv
->ostate
= state
;
401 if (state3
>= STATE_TAG_LANGUAGE
)
402 state3
= STATE_TAG_NONE
;
405 ret
= ascii_wctomb(conv
,buf
,wc
,1);
406 if (ret
!= RET_ILUNI
) {
407 if (ret
!= 1) abort();
409 int count
= (state1
== STATE_ASCII
? 1 : 4);
412 if (state1
!= STATE_ASCII
) {
417 state1
= STATE_ASCII
;
420 if (wc
== 0x000a || wc
== 0x000d)
421 state2
= STATE_G2_NONE
;
423 conv
->ostate
= state
;
428 conversion_list
= conversion_lists
[state3
];
431 switch (conversion_list
& ((1 << 3) - 1)) {
435 /* Try ISO-8859-1. */
436 ret
= iso8859_1_wctomb(conv
,buf
,wc
,1);
437 if (ret
!= RET_ILUNI
) {
438 if (ret
!= 1) abort();
439 if (buf
[0] >= 0x80) {
440 int count
= (state2
== STATE_G2_ISO8859_1
? 3 : 6);
443 if (state2
!= STATE_G2_ISO8859_1
) {
448 state2
= STATE_G2_ISO8859_1
;
454 conv
->ostate
= state
;
459 /* Try ISO-8859-7. */
460 ret
= iso8859_7_wctomb(conv
,buf
,wc
,1);
461 if (ret
!= RET_ILUNI
) {
462 if (ret
!= 1) abort();
463 if (buf
[0] >= 0x80) {
464 int count
= (state2
== STATE_G2_ISO8859_7
? 3 : 6);
467 if (state2
!= STATE_G2_ISO8859_7
) {
472 state2
= STATE_G2_ISO8859_7
;
478 conv
->ostate
= state
;
487 /* Try JIS X 0201-1976 Roman. */
488 ret
= jisx0201_wctomb(conv
,buf
,wc
,1);
489 if (ret
!= RET_ILUNI
) {
490 if (ret
!= 1) abort();
492 int count
= (state1
== STATE_JISX0201ROMAN
? 1 : 4);
495 if (state1
!= STATE_JISX0201ROMAN
) {
500 state1
= STATE_JISX0201ROMAN
;
503 if (wc
== 0x000a || wc
== 0x000d)
504 state2
= STATE_G2_NONE
;
506 conv
->ostate
= state
;
511 /* Try JIS X 0208-1990 in place of JIS X 0208-1978 and
513 ret
= jisx0208_wctomb(conv
,buf
,wc
,2);
514 if (ret
!= RET_ILUNI
) {
515 if (ret
!= 2) abort();
516 if (buf
[0] < 0x80 && buf
[1] < 0x80) {
517 int count
= (state1
== STATE_JISX0208
? 2 : 5);
520 if (state1
!= STATE_JISX0208
) {
525 state1
= STATE_JISX0208
;
530 conv
->ostate
= state
;
535 /* Try JIS X 0212-1990. */
536 ret
= jisx0212_wctomb(conv
,buf
,wc
,2);
537 if (ret
!= RET_ILUNI
) {
538 if (ret
!= 2) abort();
539 if (buf
[0] < 0x80 && buf
[1] < 0x80) {
540 int count
= (state1
== STATE_JISX0212
? 2 : 6);
543 if (state1
!= STATE_JISX0212
) {
549 state1
= STATE_JISX0212
;
554 conv
->ostate
= state
;
563 /* Try GB 2312-1980. */
564 ret
= gb2312_wctomb(conv
,buf
,wc
,2);
565 if (ret
!= RET_ILUNI
) {
566 if (ret
!= 2) abort();
567 if (buf
[0] < 0x80 && buf
[1] < 0x80) {
568 int count
= (state1
== STATE_GB2312
? 2 : 5);
571 if (state1
!= STATE_GB2312
) {
576 state1
= STATE_GB2312
;
581 conv
->ostate
= state
;
590 /* Try KS C 5601-1992. */
591 ret
= ksc5601_wctomb(conv
,buf
,wc
,2);
592 if (ret
!= RET_ILUNI
) {
593 if (ret
!= 2) abort();
594 if (buf
[0] < 0x80 && buf
[1] < 0x80) {
595 int count
= (state1
== STATE_KSC5601
? 2 : 6);
598 if (state1
!= STATE_KSC5601
) {
604 state1
= STATE_KSC5601
;
609 conv
->ostate
= state
;
618 /* Try JIS X 0201-1976 Kana. This is not officially part of
619 ISO-2022-JP-2, according to RFC 1554. Therefore we try this
620 only after all other attempts. */
621 ret
= jisx0201_wctomb(conv
,buf
,wc
,1);
622 if (ret
!= RET_ILUNI
) {
623 if (ret
!= 1) abort();
624 if (buf
[0] >= 0x80) {
625 int count
= (state1
== STATE_JISX0201KATAKANA
? 1 : 4);
628 if (state1
!= STATE_JISX0201KATAKANA
) {
633 state1
= STATE_JISX0201KATAKANA
;
637 conv
->ostate
= state
;
648 conversion_list
= conversion_list
>> 3;
649 } while (conversion_list
!= 0);
655 iso2022_jp2_reset (conv_t conv
, unsigned char *r
, size_t n
)
657 state_t state
= conv
->ostate
;
661 if (state1
!= STATE_ASCII
) {
667 /* conv->ostate = 0; will be done by the caller */
675 #undef STATE_TAG_LANGUAGE_zh
676 #undef STATE_TAG_LANGUAGE_z
677 #undef STATE_TAG_LANGUAGE_ko
678 #undef STATE_TAG_LANGUAGE_k
679 #undef STATE_TAG_LANGUAGE_ja
680 #undef STATE_TAG_LANGUAGE_j
681 #undef STATE_TAG_LANGUAGE
682 #undef STATE_TAG_NONE
683 #undef STATE_G2_ISO8859_7
684 #undef STATE_G2_ISO8859_1
688 #undef STATE_JISX0212
689 #undef STATE_JISX0208
690 #undef STATE_JISX0201KATAKANA
691 #undef STATE_JISX0201ROMAN