1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsJapaneseToUnicode.h"
7 #include "nsUCSupport.h"
9 #include "japanese.map"
11 #include "mozilla/Assertions.h"
12 #include "mozilla/dom/EncodingUtils.h"
14 using mozilla::dom::EncodingUtils
;
16 // HTML5 says to use Windows-31J instead of the real Shift_JIS for decoding
17 #define SJIS_INDEX gCP932Index[0]
18 #define JIS0208_INDEX gCP932Index[1]
20 #define JIS0212_INDEX gJIS0212Index
21 #define SJIS_UNMAPPED 0x30fb
22 #define UNICODE_REPLACEMENT_CHARACTER 0xfffd
23 #define IN_GR_RANGE(b) \
24 ((uint8_t(0xa1) <= uint8_t(b)) && (uint8_t(b) <= uint8_t(0xfe)))
26 NS_IMETHODIMP
nsShiftJISToUnicode::Convert(
27 const char * aSrc
, int32_t * aSrcLen
,
28 char16_t
* aDest
, int32_t * aDestLen
)
30 static const uint8_t sbIdx
[256] =
32 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00 */
33 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08 */
34 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10 */
35 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18 */
36 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20 */
37 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x28 */
38 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x30 */
39 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38 */
40 0, 1, 2, 3, 4, 5, 6, 7, /* 0x40 */
41 8, 9, 10, 11, 12, 13, 14, 15, /* 0x48 */
42 16, 17, 18, 19, 20, 21, 22, 23, /* 0x50 */
43 24, 25, 26, 27, 28, 29, 30, 31, /* 0x58 */
44 32, 33, 34, 35, 36, 37, 38, 39, /* 0x60 */
45 40, 41, 42, 43, 44, 45, 46, 47, /* 0x68 */
46 48, 49, 50, 51, 52, 53, 54, 55, /* 0x70 */
47 56, 57, 58, 59, 60, 61, 62, 0xFF, /* 0x78 */
48 63, 64, 65, 66, 67, 68, 69, 70, /* 0x80 */
49 71, 72, 73, 74, 75, 76, 77, 78, /* 0x88 */
50 79, 80, 81, 82, 83, 84, 85, 86, /* 0x90 */
51 87, 88, 89, 90, 91, 92, 93, 94, /* 0x98 */
52 95, 96, 97, 98, 99, 100, 101, 102, /* 0xa0 */
53 103, 104, 105, 106, 107, 108, 109, 110, /* 0xa8 */
54 111, 112, 113, 114, 115, 116, 117, 118, /* 0xb0 */
55 119, 120, 121, 122, 123, 124, 125, 126, /* 0xb8 */
56 127, 128, 129, 130, 131, 132, 133, 134, /* 0xc0 */
57 135, 136, 137, 138, 139, 140, 141, 142, /* 0xc8 */
58 143, 144, 145, 146, 147, 148, 149, 150, /* 0xd0 */
59 151, 152, 153, 154, 155, 156, 157, 158, /* 0xd8 */
60 159, 160, 161, 162, 163, 164, 165, 166, /* 0xe0 */
61 167, 168, 169, 170, 171, 172, 173, 174, /* 0xe8 */
62 175, 176, 177, 178, 179, 180, 181, 182, /* 0xf0 */
63 183, 184, 185, 186, 187, 0xFF, 0xFF, 0xFF, /* 0xf8 */
66 const unsigned char* srcEnd
= (unsigned char*)aSrc
+ *aSrcLen
;
67 const unsigned char* src
=(unsigned char*) aSrc
;
68 char16_t
* destEnd
= aDest
+ *aDestLen
;
69 char16_t
* dest
= aDest
;
70 while (src
< srcEnd
) {
75 *dest
++ = (char16_t
) *src
;
76 if (dest
>= destEnd
) {
80 mData
= SJIS_INDEX
[*src
& 0x7F];
82 mState
= 1; // two bytes
83 } else if (mData
< 0xF000) {
86 *dest
++ = mData
; // JIS 0201
87 if (dest
>= destEnd
) {
94 case 1: // Index to table
96 MOZ_ASSERT(mData
< 0xE000);
97 uint8_t off
= sbIdx
[*src
];
99 // Error handling: in the case where the second octet is not in the
100 // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
101 // interpret it as the ASCII value. In the case where the second
102 // octet is in the valid range but there is no mapping for the
103 // 2-octet sequence, do not unconsume.
106 if (mErrBehavior
== kOnError_Signal
)
107 goto error_invalidchar
;
108 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
110 char16_t ch
= gJapaneseMap
[mData
+off
];
112 if (mErrBehavior
== kOnError_Signal
)
113 goto error_invalidchar
;
126 MOZ_ASSERT(0xE000 <= mData
&& mData
< 0xF000);
127 uint8_t off
= sbIdx
[*src
];
129 // Error handling as in case 1
132 if (mErrBehavior
== kOnError_Signal
)
133 goto error_invalidchar
;
135 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
137 *dest
++ = mData
+ off
;
148 *aDestLen
= dest
- aDest
;
151 *aDestLen
= dest
- aDest
;
152 *aSrcLen
= src
- (const unsigned char*)aSrc
;
153 return NS_ERROR_ILLEGAL_INPUT
;
155 *aDestLen
= dest
- aDest
;
157 if ((mState
== 0) && (src
== srcEnd
)) {
160 *aSrcLen
= src
- (const unsigned char*)aSrc
;
161 return NS_OK_UDEC_MOREOUTPUT
;
165 nsShiftJISToUnicode::GetCharacterForUnMapped()
167 return char16_t(SJIS_UNMAPPED
);
170 NS_IMETHODIMP
nsEUCJPToUnicodeV2::Convert(
171 const char * aSrc
, int32_t * aSrcLen
,
172 char16_t
* aDest
, int32_t * aDestLen
)
174 static const uint8_t sbIdx
[256] =
177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
178 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
180 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
181 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
183 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
184 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
186 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
187 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
189 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
190 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
192 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
193 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
195 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
196 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
198 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
199 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
201 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
202 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
204 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
205 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
207 0xFF, 0, 1, 2, 3, 4, 5, 6,
208 7, 8 , 9, 10, 11, 12, 13, 14,
210 15, 16, 17, 18, 19, 20, 21, 22,
211 23, 24, 25, 26, 27, 28, 29, 30,
213 31, 32, 33, 34, 35, 36, 37, 38,
214 39, 40, 41, 42, 43, 44, 45, 46,
216 47, 48, 49, 50, 51, 52, 53, 54,
217 55, 56, 57, 58, 59, 60, 61, 62,
219 63, 64, 65, 66, 67, 68, 69, 70,
220 71, 72, 73, 74, 75, 76, 77, 78,
222 79, 80, 81, 82, 83, 84, 85, 86,
223 87, 88, 89, 90, 91, 92, 93, 0xFF,
226 const unsigned char* srcEnd
= (unsigned char*)aSrc
+ *aSrcLen
;
227 const unsigned char* src
=(unsigned char*) aSrc
;
228 char16_t
* destEnd
= aDest
+ *aDestLen
;
229 char16_t
* dest
= aDest
;
230 while((src
< srcEnd
))
235 if(*src
& 0x80 && *src
!= (unsigned char)0xa0)
237 mData
= JIS0208_INDEX
[*src
& 0x7F];
240 mState
= 1; // two byte JIS0208
244 mState
= 2; // JIS0201
245 } else if(0x8f == *src
) {
247 mState
= 3; // JIS0212
250 if (mErrBehavior
== kOnError_Signal
)
251 goto error_invalidchar
;
259 *dest
++ = (char16_t
) *src
;
265 case 1: // Index to table
267 uint8_t off
= sbIdx
[*src
];
269 if (mErrBehavior
== kOnError_Signal
)
270 goto error_invalidchar
;
272 // if the first byte is valid for EUC-JP but the second
273 // is not while being a valid US-ASCII, save it
274 // instead of eating it up !
275 if ( (uint8_t)*src
< (uint8_t)0x7f )
278 *dest
++ = gJapaneseMap
[mData
+off
];
288 if((0xA1 <= *src
) && (*src
<= 0xDF)) {
289 *dest
++ = (0xFF61-0x00A1) + *src
;
291 if (mErrBehavior
== kOnError_Signal
)
292 goto error_invalidchar
;
294 // if 0x8e is not followed by a valid JIS X 0201 byte
295 // but by a valid US-ASCII, save it instead of eating it up.
296 if ( (uint8_t)*src
< (uint8_t)0x7f )
307 if (IN_GR_RANGE(*src
))
309 mData
= JIS0212_INDEX
[*src
& 0x7F];
317 // First "JIS 0212" byte is not in the valid GR range: save it
318 if (mErrBehavior
== kOnError_Signal
)
319 goto error_invalidchar
;
330 uint8_t off
= sbIdx
[*src
];
332 *dest
++ = gJapaneseMap
[mData
+off
];
338 // else fall through to error handler
340 case 5: // two bytes undefined
342 if (mErrBehavior
== kOnError_Signal
)
343 goto error_invalidchar
;
345 // Undefined JIS 0212 two byte sequence. If the second byte is in
346 // the valid range for a two byte sequence (0xa1 - 0xfe) consume
347 // both bytes. Otherwise resynchronize on the second byte.
348 if (!IN_GR_RANGE(*src
))
358 *aDestLen
= dest
- aDest
;
361 *aDestLen
= dest
- aDest
;
362 *aSrcLen
= src
- (const unsigned char*)aSrc
;
363 return NS_ERROR_ILLEGAL_INPUT
;
365 *aDestLen
= dest
- aDest
;
367 if ((mState
== 0) && (src
== srcEnd
)) {
370 *aSrcLen
= src
- (const unsigned char*)aSrc
;
371 return NS_OK_UDEC_MOREOUTPUT
;
376 NS_IMETHODIMP
nsISO2022JPToUnicodeV2::Convert(
377 const char * aSrc
, int32_t * aSrcLen
,
378 char16_t
* aDest
, int32_t * aDestLen
)
380 static const uint16_t fbIdx
[128] =
383 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
384 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
386 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
387 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
389 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6,
390 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14,
392 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22,
393 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30,
395 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38,
396 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46,
398 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54,
399 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62,
401 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70,
402 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78,
404 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86,
405 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD,
407 static const uint8_t sbIdx
[256] =
410 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
411 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
413 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
414 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
416 0xFF, 0, 1, 2, 3, 4, 5, 6,
417 7, 8 , 9, 10, 11, 12, 13, 14,
419 15, 16, 17, 18, 19, 20, 21, 22,
420 23, 24, 25, 26, 27, 28, 29, 30,
422 31, 32, 33, 34, 35, 36, 37, 38,
423 39, 40, 41, 42, 43, 44, 45, 46,
425 47, 48, 49, 50, 51, 52, 53, 54,
426 55, 56, 57, 58, 59, 60, 61, 62,
428 63, 64, 65, 66, 67, 68, 69, 70,
429 71, 72, 73, 74, 75, 76, 77, 78,
431 79, 80, 81, 82, 83, 84, 85, 86,
432 87, 88, 89, 90, 91, 92, 93, 0xFF,
434 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
435 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
437 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
438 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
440 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
441 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
443 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
444 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
446 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
447 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
449 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
450 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
452 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
453 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
455 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
456 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
459 const unsigned char* srcEnd
= (unsigned char*)aSrc
+ *aSrcLen
;
460 const unsigned char* src
=(unsigned char*) aSrc
;
461 char16_t
* destEnd
= aDest
+ *aDestLen
;
462 char16_t
* dest
= aDest
;
463 while((src
< srcEnd
))
471 mLastLegalState
= mState
;
473 } else if(*src
& 0x80) {
474 if (mErrBehavior
== kOnError_Signal
)
476 if (CHECK_OVERRUN(dest
, destEnd
, 1))
478 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
480 if (CHECK_OVERRUN(dest
, destEnd
, 1))
482 *dest
++ = (char16_t
) *src
;
488 mState
= mState_ESC_28
;
489 } else if ('$' == *src
) {
490 mState
= mState_ESC_24
;
491 } else if ('.' == *src
) { // for ISO-2022-JP-2
492 mState
= mState_ESC_2e
;
493 } else if ('N' == *src
) { // for ISO-2022-JP-2
494 mState
= mState_ESC_4e
;
496 if (CHECK_OVERRUN(dest
, destEnd
, 2))
498 *dest
++ = (char16_t
) 0x1b;
500 if (mErrBehavior
== kOnError_Signal
)
502 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
504 *dest
++ = (char16_t
) *src
;
506 mState
= mLastLegalState
;
510 case mState_ESC_28
: // ESC (
512 mState
= mState_ASCII
;
513 if (mRunLength
== 0) {
514 if (CHECK_OVERRUN(dest
, destEnd
, 1))
519 } else if ('J' == *src
) {
520 mState
= mState_JISX0201_1976Roman
;
521 if (mRunLength
== 0 && mLastLegalState
!= mState_ASCII
) {
522 if (CHECK_OVERRUN(dest
, destEnd
, 1))
524 if (mErrBehavior
== kOnError_Signal
)
529 } else if ('I' == *src
) {
530 mState
= mState_JISX0201_1976Kana
;
533 if (CHECK_OVERRUN(dest
, destEnd
, 3))
535 *dest
++ = (char16_t
) 0x1b;
536 *dest
++ = (char16_t
) '(';
538 if (mErrBehavior
== kOnError_Signal
)
540 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
542 *dest
++ = (char16_t
) *src
;
544 mState
= mLastLegalState
;
548 case mState_ESC_24
: // ESC $
550 mState
= mState_JISX0208_1978
;
552 } else if ('A' == *src
) {
553 mState
= mState_GB2312_1980
;
555 } else if ('B' == *src
) {
556 mState
= mState_JISX0208_1983
;
558 } else if ('(' == *src
) {
559 mState
= mState_ESC_24_28
;
561 if (CHECK_OVERRUN(dest
, destEnd
, 3))
563 *dest
++ = (char16_t
) 0x1b;
564 *dest
++ = (char16_t
) '$';
566 if (mErrBehavior
== kOnError_Signal
)
568 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
570 *dest
++ = (char16_t
) *src
;
572 mState
= mLastLegalState
;
576 case mState_ESC_24_28
: // ESC $ (
578 mState
= mState_KSC5601_1987
;
580 } else if ('D' == *src
) {
581 mState
= mState_JISX0212_1990
;
584 if (CHECK_OVERRUN(dest
, destEnd
, 4))
586 *dest
++ = (char16_t
) 0x1b;
587 *dest
++ = (char16_t
) '$';
588 *dest
++ = (char16_t
) '(';
590 if (mErrBehavior
== kOnError_Signal
)
592 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
594 *dest
++ = (char16_t
) *src
;
596 mState
= mLastLegalState
;
600 case mState_JISX0201_1976Roman
:
602 mLastLegalState
= mState
;
604 } else if(*src
& 0x80) {
605 if (mErrBehavior
== kOnError_Signal
)
607 if (CHECK_OVERRUN(dest
, destEnd
, 1))
609 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
612 // XXX We need to decide how to handle \ and ~ here
613 // we may need a if statement here for '\' and '~'
614 // to map them to Yen and Overbar
615 if (CHECK_OVERRUN(dest
, destEnd
, 1))
617 *dest
++ = (char16_t
) *src
;
622 case mState_JISX0201_1976Kana
:
624 mLastLegalState
= mState
;
627 if (CHECK_OVERRUN(dest
, destEnd
, 1))
629 if((0x21 <= *src
) && (*src
<= 0x5F)) {
630 *dest
++ = (0xFF61-0x0021) + *src
;
632 if (mErrBehavior
== kOnError_Signal
)
634 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
640 case mState_JISX0208_1978
:
642 mLastLegalState
= mState
;
644 } else if(*src
& 0x80) {
645 mLastLegalState
= mState
;
646 mState
= mState_ERROR
;
648 mData
= JIS0208_INDEX
[*src
& 0x7F];
649 if (0xFFFD == mData
) {
650 if (mErrBehavior
== kOnError_Signal
)
652 mState
= mState_ERROR
;
654 mState
= mState_JISX0208_1978_2ndbyte
;
659 case mState_GB2312_1980
:
661 mLastLegalState
= mState
;
663 } else if(*src
& 0x80) {
664 mLastLegalState
= mState
;
665 mState
= mState_ERROR
;
667 mData
= fbIdx
[*src
& 0x7F];
668 if (0xFFFD == mData
) {
669 if (mErrBehavior
== kOnError_Signal
)
671 mState
= mState_ERROR
;
673 mState
= mState_GB2312_1980_2ndbyte
;
678 case mState_JISX0208_1983
:
680 mLastLegalState
= mState
;
682 } else if(*src
& 0x80) {
683 mLastLegalState
= mState
;
684 mState
= mState_ERROR
;
686 mData
= JIS0208_INDEX
[*src
& 0x7F];
687 if (0xFFFD == mData
) {
688 if (mErrBehavior
== kOnError_Signal
)
690 mState
= mState_ERROR
;
692 mState
= mState_JISX0208_1983_2ndbyte
;
697 case mState_KSC5601_1987
:
699 mLastLegalState
= mState
;
701 } else if(*src
& 0x80) {
702 mLastLegalState
= mState
;
703 mState
= mState_ERROR
;
705 mData
= fbIdx
[*src
& 0x7F];
706 if (0xFFFD == mData
) {
707 if (mErrBehavior
== kOnError_Signal
)
709 mState
= mState_ERROR
;
711 mState
= mState_KSC5601_1987_2ndbyte
;
716 case mState_JISX0212_1990
:
718 mLastLegalState
= mState
;
720 } else if(*src
& 0x80) {
721 mLastLegalState
= mState
;
722 mState
= mState_ERROR
;
724 mData
= JIS0212_INDEX
[*src
& 0x7F];
725 if (0xFFFD == mData
) {
726 if (mErrBehavior
== kOnError_Signal
)
728 mState
= mState_ERROR
;
730 mState
= mState_JISX0212_1990_2ndbyte
;
735 case mState_JISX0208_1978_2ndbyte
:
737 if (CHECK_OVERRUN(dest
, destEnd
, 1))
739 uint8_t off
= sbIdx
[*src
];
741 if (mErrBehavior
== kOnError_Signal
)
743 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
745 // XXX We need to map from JIS X 0208 1983 to 1987
746 // in the next line before pass to *dest++
747 *dest
++ = gJapaneseMap
[mData
+off
];
750 mState
= mState_JISX0208_1978
;
754 case mState_GB2312_1980_2ndbyte
:
756 if (CHECK_OVERRUN(dest
, destEnd
, 1))
758 uint8_t off
= sbIdx
[*src
];
760 if (mErrBehavior
== kOnError_Signal
)
762 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
764 if (!mGB2312Decoder
) {
765 // creating a delegate converter (GB2312)
767 EncodingUtils::DecoderForEncoding("gb18030");
769 if (!mGB2312Decoder
) {// failed creating a delegate converter
774 int32_t gbLen
= 2, uniLen
= 1;
775 // ((mData/94)+0x21) is the original 1st byte.
776 // *src is the present 2nd byte.
777 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
778 gb
[0] = ((mData
/ 94) + 0x21) | 0x80;
780 // Convert GB2312 to unicode.
781 mGB2312Decoder
->Convert((const char *)gb
, &gbLen
,
787 mState
= mState_GB2312_1980
;
791 case mState_JISX0208_1983_2ndbyte
:
793 if (CHECK_OVERRUN(dest
, destEnd
, 1))
795 uint8_t off
= sbIdx
[*src
];
797 if (mErrBehavior
== kOnError_Signal
)
799 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
801 *dest
++ = gJapaneseMap
[mData
+off
];
804 mState
= mState_JISX0208_1983
;
808 case mState_KSC5601_1987_2ndbyte
:
810 if (CHECK_OVERRUN(dest
, destEnd
, 1))
812 uint8_t off
= sbIdx
[*src
];
814 if (mErrBehavior
== kOnError_Signal
)
816 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
818 if (!mEUCKRDecoder
) {
819 // creating a delegate converter (EUC-KR)
821 EncodingUtils::DecoderForEncoding(NS_LITERAL_CSTRING("EUC-KR"));
823 if (!mEUCKRDecoder
) {// failed creating a delegate converter
826 unsigned char ksc
[2];
828 int32_t kscLen
= 2, uniLen
= 1;
829 // ((mData/94)+0x21) is the original 1st byte.
830 // *src is the present 2nd byte.
831 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
832 ksc
[0] = ((mData
/ 94) + 0x21) | 0x80;
833 ksc
[1] = *src
| 0x80;
834 // Convert EUC-KR to unicode.
835 mEUCKRDecoder
->Convert((const char *)ksc
, &kscLen
,
841 mState
= mState_KSC5601_1987
;
845 case mState_JISX0212_1990_2ndbyte
:
847 uint8_t off
= sbIdx
[*src
];
848 if (CHECK_OVERRUN(dest
, destEnd
, 1))
851 if (mErrBehavior
== kOnError_Signal
)
853 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
855 *dest
++ = gJapaneseMap
[mData
+off
];
858 mState
= mState_JISX0212_1990
;
862 case mState_ESC_2e
: // ESC .
863 // "ESC ." will designate 96 character set to G2.
864 mState
= mLastLegalState
;
866 G2charset
= G2_ISO88591
;
867 } else if ('F' == *src
) {
868 G2charset
= G2_ISO88597
;
870 if (CHECK_OVERRUN(dest
, destEnd
, 3))
872 *dest
++ = (char16_t
) 0x1b;
873 *dest
++ = (char16_t
) '.';
875 if (mErrBehavior
== kOnError_Signal
)
877 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
879 *dest
++ = (char16_t
) *src
;
884 case mState_ESC_4e
: // ESC N
885 // "ESC N" is the SS2 sequence, that invoke a G2 designated
886 // character set. Since SS2 is effective only for next one
887 // character, mState should be returned to the last status.
888 mState
= mLastLegalState
;
889 if((0x20 <= *src
) && (*src
<= 0x7F)) {
890 if (CHECK_OVERRUN(dest
, destEnd
, 1))
892 if (G2_ISO88591
== G2charset
) {
893 *dest
++ = *src
| 0x80;
894 } else if (G2_ISO88597
== G2charset
) {
895 if (!mISO88597Decoder
) {
896 // creating a delegate converter (ISO-8859-7)
898 EncodingUtils::DecoderForEncoding(NS_LITERAL_CSTRING("ISO-8859-7"));
900 if (!mISO88597Decoder
) {// failed creating a delegate converter
903 // Put one character with ISO-8859-7 encoding.
904 unsigned char gr
= *src
| 0x80;
906 int32_t grLen
= 1, uniLen
= 1;
907 // Convert ISO-8859-7 to unicode.
908 mISO88597Decoder
->Convert((const char *)&gr
, &grLen
,
912 } else {// G2charset is G2_unknown (not designated yet)
913 if (mErrBehavior
== kOnError_Signal
)
915 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
919 if (CHECK_OVERRUN(dest
, destEnd
, 3))
921 *dest
++ = (char16_t
) 0x1b;
922 *dest
++ = (char16_t
) 'N';
924 if (mErrBehavior
== kOnError_Signal
)
926 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
928 *dest
++ = (char16_t
) *src
;
934 mState
= mLastLegalState
;
935 if (mErrBehavior
== kOnError_Signal
) {
939 if (CHECK_OVERRUN(dest
, destEnd
, 1))
941 *dest
++ = UNICODE_REPLACEMENT_CHARACTER
;
948 *aDestLen
= dest
- aDest
;
951 *aDestLen
= dest
- aDest
;
952 *aSrcLen
= src
- (const unsigned char*)aSrc
;
953 return NS_OK_UDEC_MOREOUTPUT
;
955 *aDestLen
= dest
- aDest
;
956 *aSrcLen
= src
- (const unsigned char*)aSrc
;
957 return NS_ERROR_UNEXPECTED
;
959 *aDestLen
= dest
- aDest
;
960 *aSrcLen
= src
- (const unsigned char*)aSrc
;
961 return NS_ERROR_ILLEGAL_INPUT
;