Fix typo in 9b54bd30006c008b4a951331b273613d5bac3abf
[pm.git] / intl / uconv / ucvja / nsJapaneseToUnicode.cpp
blob6e7febd8a758551d43ef65469aacd79db8cfd52c
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsJapaneseToUnicode.h"
7 #include "nsUCSupport.h"
9 #include "japanese.map"
11 #include "mozilla/Assertions.h"
12 #include "mozilla/dom/EncodingUtils.h"
14 using mozilla::dom::EncodingUtils;
16 // HTML5 says to use Windows-31J instead of the real Shift_JIS for decoding
17 #define SJIS_INDEX gCP932Index[0]
18 #define JIS0208_INDEX gCP932Index[1]
20 #define JIS0212_INDEX gJIS0212Index
21 #define SJIS_UNMAPPED 0x30fb
22 #define UNICODE_REPLACEMENT_CHARACTER 0xfffd
23 #define IN_GR_RANGE(b) \
24 ((uint8_t(0xa1) <= uint8_t(b)) && (uint8_t(b) <= uint8_t(0xfe)))
26 NS_IMETHODIMP nsShiftJISToUnicode::Convert(
27 const char * aSrc, int32_t * aSrcLen,
28 char16_t * aDest, int32_t * aDestLen)
30 static const uint8_t sbIdx[256] =
32 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00 */
33 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08 */
34 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10 */
35 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18 */
36 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20 */
37 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x28 */
38 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x30 */
39 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38 */
40 0, 1, 2, 3, 4, 5, 6, 7, /* 0x40 */
41 8, 9, 10, 11, 12, 13, 14, 15, /* 0x48 */
42 16, 17, 18, 19, 20, 21, 22, 23, /* 0x50 */
43 24, 25, 26, 27, 28, 29, 30, 31, /* 0x58 */
44 32, 33, 34, 35, 36, 37, 38, 39, /* 0x60 */
45 40, 41, 42, 43, 44, 45, 46, 47, /* 0x68 */
46 48, 49, 50, 51, 52, 53, 54, 55, /* 0x70 */
47 56, 57, 58, 59, 60, 61, 62, 0xFF, /* 0x78 */
48 63, 64, 65, 66, 67, 68, 69, 70, /* 0x80 */
49 71, 72, 73, 74, 75, 76, 77, 78, /* 0x88 */
50 79, 80, 81, 82, 83, 84, 85, 86, /* 0x90 */
51 87, 88, 89, 90, 91, 92, 93, 94, /* 0x98 */
52 95, 96, 97, 98, 99, 100, 101, 102, /* 0xa0 */
53 103, 104, 105, 106, 107, 108, 109, 110, /* 0xa8 */
54 111, 112, 113, 114, 115, 116, 117, 118, /* 0xb0 */
55 119, 120, 121, 122, 123, 124, 125, 126, /* 0xb8 */
56 127, 128, 129, 130, 131, 132, 133, 134, /* 0xc0 */
57 135, 136, 137, 138, 139, 140, 141, 142, /* 0xc8 */
58 143, 144, 145, 146, 147, 148, 149, 150, /* 0xd0 */
59 151, 152, 153, 154, 155, 156, 157, 158, /* 0xd8 */
60 159, 160, 161, 162, 163, 164, 165, 166, /* 0xe0 */
61 167, 168, 169, 170, 171, 172, 173, 174, /* 0xe8 */
62 175, 176, 177, 178, 179, 180, 181, 182, /* 0xf0 */
63 183, 184, 185, 186, 187, 0xFF, 0xFF, 0xFF, /* 0xf8 */
66 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
67 const unsigned char* src =(unsigned char*) aSrc;
68 char16_t* destEnd = aDest + *aDestLen;
69 char16_t* dest = aDest;
70 while (src < srcEnd) {
71 switch (mState) {
72 case 0:
73 if (*src <= 0x80) {
74 // ASCII
75 *dest++ = (char16_t) *src;
76 if (dest >= destEnd) {
77 goto error1;
79 } else {
80 mData = SJIS_INDEX[*src & 0x7F];
81 if (mData < 0xE000) {
82 mState = 1; // two bytes
83 } else if (mData < 0xF000) {
84 mState = 2; // EUDC
85 } else {
86 *dest++ = mData; // JIS 0201
87 if (dest >= destEnd) {
88 goto error1;
92 break;
94 case 1: // Index to table
96 MOZ_ASSERT(mData < 0xE000);
97 uint8_t off = sbIdx[*src];
99 // Error handling: in the case where the second octet is not in the
100 // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
101 // interpret it as the ASCII value. In the case where the second
102 // octet is in the valid range but there is no mapping for the
103 // 2-octet sequence, do not unconsume.
104 if(0xFF == off) {
105 src--;
106 if (mErrBehavior == kOnError_Signal)
107 goto error_invalidchar;
108 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
109 } else {
110 char16_t ch = gJapaneseMap[mData+off];
111 if(ch == 0xfffd) {
112 if (mErrBehavior == kOnError_Signal)
113 goto error_invalidchar;
114 ch = SJIS_UNMAPPED;
116 *dest++ = ch;
118 mState = 0;
119 if(dest >= destEnd)
120 goto error1;
122 break;
124 case 2: // EUDC
126 MOZ_ASSERT(0xE000 <= mData && mData < 0xF000);
127 uint8_t off = sbIdx[*src];
129 // Error handling as in case 1
130 if(0xFF == off) {
131 src--;
132 if (mErrBehavior == kOnError_Signal)
133 goto error_invalidchar;
135 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
136 } else {
137 *dest++ = mData + off;
139 mState = 0;
140 if(dest >= destEnd)
141 goto error1;
143 break;
146 src++;
148 *aDestLen = dest - aDest;
149 return NS_OK;
150 error_invalidchar:
151 *aDestLen = dest - aDest;
152 *aSrcLen = src - (const unsigned char*)aSrc;
153 return NS_ERROR_ILLEGAL_INPUT;
154 error1:
155 *aDestLen = dest - aDest;
156 src++;
157 if ((mState == 0) && (src == srcEnd)) {
158 return NS_OK;
160 *aSrcLen = src - (const unsigned char*)aSrc;
161 return NS_OK_UDEC_MOREOUTPUT;
164 char16_t
165 nsShiftJISToUnicode::GetCharacterForUnMapped()
167 return char16_t(SJIS_UNMAPPED);
170 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
171 const char * aSrc, int32_t * aSrcLen,
172 char16_t * aDest, int32_t * aDestLen)
174 static const uint8_t sbIdx[256] =
176 /* 0x0X */
177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
178 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
179 /* 0x1X */
180 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
181 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
182 /* 0x2X */
183 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
184 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
185 /* 0x3X */
186 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
187 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
188 /* 0x4X */
189 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
190 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
191 /* 0x5X */
192 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
193 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
194 /* 0x6X */
195 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
196 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
197 /* 0x7X */
198 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
199 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
200 /* 0x8X */
201 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
202 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
203 /* 0x9X */
204 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
205 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
206 /* 0xAX */
207 0xFF, 0, 1, 2, 3, 4, 5, 6,
208 7, 8 , 9, 10, 11, 12, 13, 14,
209 /* 0xBX */
210 15, 16, 17, 18, 19, 20, 21, 22,
211 23, 24, 25, 26, 27, 28, 29, 30,
212 /* 0xCX */
213 31, 32, 33, 34, 35, 36, 37, 38,
214 39, 40, 41, 42, 43, 44, 45, 46,
215 /* 0xDX */
216 47, 48, 49, 50, 51, 52, 53, 54,
217 55, 56, 57, 58, 59, 60, 61, 62,
218 /* 0xEX */
219 63, 64, 65, 66, 67, 68, 69, 70,
220 71, 72, 73, 74, 75, 76, 77, 78,
221 /* 0xFX */
222 79, 80, 81, 82, 83, 84, 85, 86,
223 87, 88, 89, 90, 91, 92, 93, 0xFF,
226 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
227 const unsigned char* src =(unsigned char*) aSrc;
228 char16_t* destEnd = aDest + *aDestLen;
229 char16_t* dest = aDest;
230 while((src < srcEnd))
232 switch(mState)
234 case 0:
235 if(*src & 0x80 && *src != (unsigned char)0xa0)
237 mData = JIS0208_INDEX[*src & 0x7F];
238 if(mData != 0xFFFD )
240 mState = 1; // two byte JIS0208
241 } else {
242 if( 0x8e == *src) {
243 // JIS 0201
244 mState = 2; // JIS0201
245 } else if(0x8f == *src) {
246 // JIS 0212
247 mState = 3; // JIS0212
248 } else {
249 // others
250 if (mErrBehavior == kOnError_Signal)
251 goto error_invalidchar;
252 *dest++ = 0xFFFD;
253 if(dest >= destEnd)
254 goto error1;
257 } else {
258 // ASCII
259 *dest++ = (char16_t) *src;
260 if(dest >= destEnd)
261 goto error1;
263 break;
265 case 1: // Index to table
267 uint8_t off = sbIdx[*src];
268 if(0xFF == off) {
269 if (mErrBehavior == kOnError_Signal)
270 goto error_invalidchar;
271 *dest++ = 0xFFFD;
272 // if the first byte is valid for EUC-JP but the second
273 // is not while being a valid US-ASCII, save it
274 // instead of eating it up !
275 if ( (uint8_t)*src < (uint8_t)0x7f )
276 --src;
277 } else {
278 *dest++ = gJapaneseMap[mData+off];
280 mState = 0;
281 if(dest >= destEnd)
282 goto error1;
284 break;
286 case 2: // JIS 0201
288 if((0xA1 <= *src) && (*src <= 0xDF)) {
289 *dest++ = (0xFF61-0x00A1) + *src;
290 } else {
291 if (mErrBehavior == kOnError_Signal)
292 goto error_invalidchar;
293 *dest++ = 0xFFFD;
294 // if 0x8e is not followed by a valid JIS X 0201 byte
295 // but by a valid US-ASCII, save it instead of eating it up.
296 if ( (uint8_t)*src < (uint8_t)0x7f )
297 --src;
299 mState = 0;
300 if(dest >= destEnd)
301 goto error1;
303 break;
305 case 3: // JIS 0212
307 if (IN_GR_RANGE(*src))
309 mData = JIS0212_INDEX[*src & 0x7F];
310 if(mData != 0xFFFD )
312 mState = 4;
313 } else {
314 mState = 5; // error
316 } else {
317 // First "JIS 0212" byte is not in the valid GR range: save it
318 if (mErrBehavior == kOnError_Signal)
319 goto error_invalidchar;
320 *dest++ = 0xFFFD;
321 --src;
322 mState = 0;
323 if(dest >= destEnd)
324 goto error1;
327 break;
328 case 4:
330 uint8_t off = sbIdx[*src];
331 if(0xFF != off) {
332 *dest++ = gJapaneseMap[mData+off];
333 mState = 0;
334 if(dest >= destEnd)
335 goto error1;
336 break;
338 // else fall through to error handler
340 case 5: // two bytes undefined
342 if (mErrBehavior == kOnError_Signal)
343 goto error_invalidchar;
344 *dest++ = 0xFFFD;
345 // Undefined JIS 0212 two byte sequence. If the second byte is in
346 // the valid range for a two byte sequence (0xa1 - 0xfe) consume
347 // both bytes. Otherwise resynchronize on the second byte.
348 if (!IN_GR_RANGE(*src))
349 --src;
350 mState = 0;
351 if(dest >= destEnd)
352 goto error1;
354 break;
356 src++;
358 *aDestLen = dest - aDest;
359 return NS_OK;
360 error_invalidchar:
361 *aDestLen = dest - aDest;
362 *aSrcLen = src - (const unsigned char*)aSrc;
363 return NS_ERROR_ILLEGAL_INPUT;
364 error1:
365 *aDestLen = dest - aDest;
366 src++;
367 if ((mState == 0) && (src == srcEnd)) {
368 return NS_OK;
370 *aSrcLen = src - (const unsigned char*)aSrc;
371 return NS_OK_UDEC_MOREOUTPUT;
376 NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
377 const char * aSrc, int32_t * aSrcLen,
378 char16_t * aDest, int32_t * aDestLen)
380 static const uint16_t fbIdx[128] =
382 /* 0x8X */
383 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
384 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
385 /* 0x9X */
386 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
387 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
388 /* 0xAX */
389 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6,
390 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14,
391 /* 0xBX */
392 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22,
393 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30,
394 /* 0xCX */
395 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38,
396 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46,
397 /* 0xDX */
398 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54,
399 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62,
400 /* 0xEX */
401 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70,
402 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78,
403 /* 0xFX */
404 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86,
405 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD,
407 static const uint8_t sbIdx[256] =
409 /* 0x0X */
410 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
411 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
412 /* 0x1X */
413 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
414 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
415 /* 0x2X */
416 0xFF, 0, 1, 2, 3, 4, 5, 6,
417 7, 8 , 9, 10, 11, 12, 13, 14,
418 /* 0x3X */
419 15, 16, 17, 18, 19, 20, 21, 22,
420 23, 24, 25, 26, 27, 28, 29, 30,
421 /* 0x4X */
422 31, 32, 33, 34, 35, 36, 37, 38,
423 39, 40, 41, 42, 43, 44, 45, 46,
424 /* 0x5X */
425 47, 48, 49, 50, 51, 52, 53, 54,
426 55, 56, 57, 58, 59, 60, 61, 62,
427 /* 0x6X */
428 63, 64, 65, 66, 67, 68, 69, 70,
429 71, 72, 73, 74, 75, 76, 77, 78,
430 /* 0x7X */
431 79, 80, 81, 82, 83, 84, 85, 86,
432 87, 88, 89, 90, 91, 92, 93, 0xFF,
433 /* 0x8X */
434 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
435 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
436 /* 0x9X */
437 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
438 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
439 /* 0xAX */
440 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
441 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
442 /* 0xBX */
443 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
444 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
445 /* 0xCX */
446 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
447 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
448 /* 0xDX */
449 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
450 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
451 /* 0xEX */
452 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
453 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
454 /* 0xFX */
455 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
456 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
459 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
460 const unsigned char* src =(unsigned char*) aSrc;
461 char16_t* destEnd = aDest + *aDestLen;
462 char16_t* dest = aDest;
463 while((src < srcEnd))
466 switch(mState)
468 case mState_ASCII:
469 if(0x1b == *src)
471 mLastLegalState = mState;
472 mState = mState_ESC;
473 } else if(*src & 0x80) {
474 if (mErrBehavior == kOnError_Signal)
475 goto error3;
476 if (CHECK_OVERRUN(dest, destEnd, 1))
477 goto error1;
478 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
479 } else {
480 if (CHECK_OVERRUN(dest, destEnd, 1))
481 goto error1;
482 *dest++ = (char16_t) *src;
484 break;
486 case mState_ESC:
487 if( '(' == *src) {
488 mState = mState_ESC_28;
489 } else if ('$' == *src) {
490 mState = mState_ESC_24;
491 } else if ('.' == *src) { // for ISO-2022-JP-2
492 mState = mState_ESC_2e;
493 } else if ('N' == *src) { // for ISO-2022-JP-2
494 mState = mState_ESC_4e;
495 } else {
496 if (CHECK_OVERRUN(dest, destEnd, 2))
497 goto error1;
498 *dest++ = (char16_t) 0x1b;
499 if (0x80 & *src) {
500 if (mErrBehavior == kOnError_Signal)
501 goto error3;
502 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
503 } else {
504 *dest++ = (char16_t) *src;
506 mState = mLastLegalState;
508 break;
510 case mState_ESC_28: // ESC (
511 if( 'B' == *src) {
512 mState = mState_ASCII;
513 if (mRunLength == 0) {
514 if (CHECK_OVERRUN(dest, destEnd, 1))
515 goto error1;
516 *dest++ = 0xFFFD;
518 mRunLength = 0;
519 } else if ('J' == *src) {
520 mState = mState_JISX0201_1976Roman;
521 if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
522 if (CHECK_OVERRUN(dest, destEnd, 1))
523 goto error1;
524 if (mErrBehavior == kOnError_Signal)
525 goto error3;
526 *dest++ = 0xFFFD;
528 mRunLength = 0;
529 } else if ('I' == *src) {
530 mState = mState_JISX0201_1976Kana;
531 mRunLength = 0;
532 } else {
533 if (CHECK_OVERRUN(dest, destEnd, 3))
534 goto error1;
535 *dest++ = (char16_t) 0x1b;
536 *dest++ = (char16_t) '(';
537 if (0x80 & *src) {
538 if (mErrBehavior == kOnError_Signal)
539 goto error3;
540 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
541 } else {
542 *dest++ = (char16_t) *src;
544 mState = mLastLegalState;
546 break;
548 case mState_ESC_24: // ESC $
549 if( '@' == *src) {
550 mState = mState_JISX0208_1978;
551 mRunLength = 0;
552 } else if ('A' == *src) {
553 mState = mState_GB2312_1980;
554 mRunLength = 0;
555 } else if ('B' == *src) {
556 mState = mState_JISX0208_1983;
557 mRunLength = 0;
558 } else if ('(' == *src) {
559 mState = mState_ESC_24_28;
560 } else {
561 if (CHECK_OVERRUN(dest, destEnd, 3))
562 goto error1;
563 *dest++ = (char16_t) 0x1b;
564 *dest++ = (char16_t) '$';
565 if (0x80 & *src) {
566 if (mErrBehavior == kOnError_Signal)
567 goto error3;
568 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
569 } else {
570 *dest++ = (char16_t) *src;
572 mState = mLastLegalState;
574 break;
576 case mState_ESC_24_28: // ESC $ (
577 if( 'C' == *src) {
578 mState = mState_KSC5601_1987;
579 mRunLength = 0;
580 } else if ('D' == *src) {
581 mState = mState_JISX0212_1990;
582 mRunLength = 0;
583 } else {
584 if (CHECK_OVERRUN(dest, destEnd, 4))
585 goto error1;
586 *dest++ = (char16_t) 0x1b;
587 *dest++ = (char16_t) '$';
588 *dest++ = (char16_t) '(';
589 if (0x80 & *src) {
590 if (mErrBehavior == kOnError_Signal)
591 goto error3;
592 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
593 } else {
594 *dest++ = (char16_t) *src;
596 mState = mLastLegalState;
598 break;
600 case mState_JISX0201_1976Roman:
601 if(0x1b == *src) {
602 mLastLegalState = mState;
603 mState = mState_ESC;
604 } else if(*src & 0x80) {
605 if (mErrBehavior == kOnError_Signal)
606 goto error3;
607 if (CHECK_OVERRUN(dest, destEnd, 1))
608 goto error1;
609 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
610 ++mRunLength;
611 } else {
612 // XXX We need to decide how to handle \ and ~ here
613 // we may need a if statement here for '\' and '~'
614 // to map them to Yen and Overbar
615 if (CHECK_OVERRUN(dest, destEnd, 1))
616 goto error1;
617 *dest++ = (char16_t) *src;
618 ++mRunLength;
620 break;
622 case mState_JISX0201_1976Kana:
623 if(0x1b == *src) {
624 mLastLegalState = mState;
625 mState = mState_ESC;
626 } else {
627 if (CHECK_OVERRUN(dest, destEnd, 1))
628 goto error1;
629 if((0x21 <= *src) && (*src <= 0x5F)) {
630 *dest++ = (0xFF61-0x0021) + *src;
631 } else {
632 if (mErrBehavior == kOnError_Signal)
633 goto error3;
634 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
636 ++mRunLength;
638 break;
640 case mState_JISX0208_1978:
641 if(0x1b == *src) {
642 mLastLegalState = mState;
643 mState = mState_ESC;
644 } else if(*src & 0x80) {
645 mLastLegalState = mState;
646 mState = mState_ERROR;
647 } else {
648 mData = JIS0208_INDEX[*src & 0x7F];
649 if (0xFFFD == mData) {
650 if (mErrBehavior == kOnError_Signal)
651 goto error3;
652 mState = mState_ERROR;
653 } else {
654 mState = mState_JISX0208_1978_2ndbyte;
657 break;
659 case mState_GB2312_1980:
660 if(0x1b == *src) {
661 mLastLegalState = mState;
662 mState = mState_ESC;
663 } else if(*src & 0x80) {
664 mLastLegalState = mState;
665 mState = mState_ERROR;
666 } else {
667 mData = fbIdx[*src & 0x7F];
668 if (0xFFFD == mData) {
669 if (mErrBehavior == kOnError_Signal)
670 goto error3;
671 mState = mState_ERROR;
672 } else {
673 mState = mState_GB2312_1980_2ndbyte;
676 break;
678 case mState_JISX0208_1983:
679 if(0x1b == *src) {
680 mLastLegalState = mState;
681 mState = mState_ESC;
682 } else if(*src & 0x80) {
683 mLastLegalState = mState;
684 mState = mState_ERROR;
685 } else {
686 mData = JIS0208_INDEX[*src & 0x7F];
687 if (0xFFFD == mData) {
688 if (mErrBehavior == kOnError_Signal)
689 goto error3;
690 mState = mState_ERROR;
691 } else {
692 mState = mState_JISX0208_1983_2ndbyte;
695 break;
697 case mState_KSC5601_1987:
698 if(0x1b == *src) {
699 mLastLegalState = mState;
700 mState = mState_ESC;
701 } else if(*src & 0x80) {
702 mLastLegalState = mState;
703 mState = mState_ERROR;
704 } else {
705 mData = fbIdx[*src & 0x7F];
706 if (0xFFFD == mData) {
707 if (mErrBehavior == kOnError_Signal)
708 goto error3;
709 mState = mState_ERROR;
710 } else {
711 mState = mState_KSC5601_1987_2ndbyte;
714 break;
716 case mState_JISX0212_1990:
717 if(0x1b == *src) {
718 mLastLegalState = mState;
719 mState = mState_ESC;
720 } else if(*src & 0x80) {
721 mLastLegalState = mState;
722 mState = mState_ERROR;
723 } else {
724 mData = JIS0212_INDEX[*src & 0x7F];
725 if (0xFFFD == mData) {
726 if (mErrBehavior == kOnError_Signal)
727 goto error3;
728 mState = mState_ERROR;
729 } else {
730 mState = mState_JISX0212_1990_2ndbyte;
733 break;
735 case mState_JISX0208_1978_2ndbyte:
737 if (CHECK_OVERRUN(dest, destEnd, 1))
738 goto error1;
739 uint8_t off = sbIdx[*src];
740 if(0xFF == off) {
741 if (mErrBehavior == kOnError_Signal)
742 goto error3;
743 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
744 } else {
745 // XXX We need to map from JIS X 0208 1983 to 1987
746 // in the next line before pass to *dest++
747 *dest++ = gJapaneseMap[mData+off];
749 ++mRunLength;
750 mState = mState_JISX0208_1978;
752 break;
754 case mState_GB2312_1980_2ndbyte:
756 if (CHECK_OVERRUN(dest, destEnd, 1))
757 goto error1;
758 uint8_t off = sbIdx[*src];
759 if(0xFF == off) {
760 if (mErrBehavior == kOnError_Signal)
761 goto error3;
762 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
763 } else {
764 if (!mGB2312Decoder) {
765 // creating a delegate converter (GB2312)
766 mGB2312Decoder =
767 EncodingUtils::DecoderForEncoding("gb18030");
769 if (!mGB2312Decoder) {// failed creating a delegate converter
770 goto error2;
771 } else {
772 unsigned char gb[2];
773 char16_t uni;
774 int32_t gbLen = 2, uniLen = 1;
775 // ((mData/94)+0x21) is the original 1st byte.
776 // *src is the present 2nd byte.
777 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
778 gb[0] = ((mData / 94) + 0x21) | 0x80;
779 gb[1] = *src | 0x80;
780 // Convert GB2312 to unicode.
781 mGB2312Decoder->Convert((const char *)gb, &gbLen,
782 &uni, &uniLen);
783 *dest++ = uni;
786 ++mRunLength;
787 mState = mState_GB2312_1980;
789 break;
791 case mState_JISX0208_1983_2ndbyte:
793 if (CHECK_OVERRUN(dest, destEnd, 1))
794 goto error1;
795 uint8_t off = sbIdx[*src];
796 if(0xFF == off) {
797 if (mErrBehavior == kOnError_Signal)
798 goto error3;
799 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
800 } else {
801 *dest++ = gJapaneseMap[mData+off];
803 ++mRunLength;
804 mState = mState_JISX0208_1983;
806 break;
808 case mState_KSC5601_1987_2ndbyte:
810 if (CHECK_OVERRUN(dest, destEnd, 1))
811 goto error1;
812 uint8_t off = sbIdx[*src];
813 if(0xFF == off) {
814 if (mErrBehavior == kOnError_Signal)
815 goto error3;
816 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
817 } else {
818 if (!mEUCKRDecoder) {
819 // creating a delegate converter (EUC-KR)
820 mEUCKRDecoder =
821 EncodingUtils::DecoderForEncoding(NS_LITERAL_CSTRING("EUC-KR"));
823 if (!mEUCKRDecoder) {// failed creating a delegate converter
824 goto error2;
825 } else {
826 unsigned char ksc[2];
827 char16_t uni;
828 int32_t kscLen = 2, uniLen = 1;
829 // ((mData/94)+0x21) is the original 1st byte.
830 // *src is the present 2nd byte.
831 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
832 ksc[0] = ((mData / 94) + 0x21) | 0x80;
833 ksc[1] = *src | 0x80;
834 // Convert EUC-KR to unicode.
835 mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
836 &uni, &uniLen);
837 *dest++ = uni;
840 ++mRunLength;
841 mState = mState_KSC5601_1987;
843 break;
845 case mState_JISX0212_1990_2ndbyte:
847 uint8_t off = sbIdx[*src];
848 if (CHECK_OVERRUN(dest, destEnd, 1))
849 goto error1;
850 if(0xFF == off) {
851 if (mErrBehavior == kOnError_Signal)
852 goto error3;
853 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
854 } else {
855 *dest++ = gJapaneseMap[mData+off];
857 ++mRunLength;
858 mState = mState_JISX0212_1990;
860 break;
862 case mState_ESC_2e: // ESC .
863 // "ESC ." will designate 96 character set to G2.
864 mState = mLastLegalState;
865 if( 'A' == *src) {
866 G2charset = G2_ISO88591;
867 } else if ('F' == *src) {
868 G2charset = G2_ISO88597;
869 } else {
870 if (CHECK_OVERRUN(dest, destEnd, 3))
871 goto error1;
872 *dest++ = (char16_t) 0x1b;
873 *dest++ = (char16_t) '.';
874 if (0x80 & *src) {
875 if (mErrBehavior == kOnError_Signal)
876 goto error3;
877 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
878 } else {
879 *dest++ = (char16_t) *src;
882 break;
884 case mState_ESC_4e: // ESC N
885 // "ESC N" is the SS2 sequence, that invoke a G2 designated
886 // character set. Since SS2 is effective only for next one
887 // character, mState should be returned to the last status.
888 mState = mLastLegalState;
889 if((0x20 <= *src) && (*src <= 0x7F)) {
890 if (CHECK_OVERRUN(dest, destEnd, 1))
891 goto error1;
892 if (G2_ISO88591 == G2charset) {
893 *dest++ = *src | 0x80;
894 } else if (G2_ISO88597 == G2charset) {
895 if (!mISO88597Decoder) {
896 // creating a delegate converter (ISO-8859-7)
897 mISO88597Decoder =
898 EncodingUtils::DecoderForEncoding(NS_LITERAL_CSTRING("ISO-8859-7"));
900 if (!mISO88597Decoder) {// failed creating a delegate converter
901 goto error2;
902 } else {
903 // Put one character with ISO-8859-7 encoding.
904 unsigned char gr = *src | 0x80;
905 char16_t uni;
906 int32_t grLen = 1, uniLen = 1;
907 // Convert ISO-8859-7 to unicode.
908 mISO88597Decoder->Convert((const char *)&gr, &grLen,
909 &uni, &uniLen);
910 *dest++ = uni;
912 } else {// G2charset is G2_unknown (not designated yet)
913 if (mErrBehavior == kOnError_Signal)
914 goto error3;
915 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
917 ++mRunLength;
918 } else {
919 if (CHECK_OVERRUN(dest, destEnd, 3))
920 goto error1;
921 *dest++ = (char16_t) 0x1b;
922 *dest++ = (char16_t) 'N';
923 if (0x80 & *src) {
924 if (mErrBehavior == kOnError_Signal)
925 goto error3;
926 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
927 } else {
928 *dest++ = (char16_t) *src;
931 break;
933 case mState_ERROR:
934 mState = mLastLegalState;
935 if (mErrBehavior == kOnError_Signal) {
936 mRunLength = 0;
937 goto error3;
939 if (CHECK_OVERRUN(dest, destEnd, 1))
940 goto error1;
941 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
942 ++mRunLength;
943 break;
945 } // switch
946 src++;
948 *aDestLen = dest - aDest;
949 return NS_OK;
950 error1:
951 *aDestLen = dest - aDest;
952 *aSrcLen = src - (const unsigned char*)aSrc;
953 return NS_OK_UDEC_MOREOUTPUT;
954 error2:
955 *aDestLen = dest - aDest;
956 *aSrcLen = src - (const unsigned char*)aSrc;
957 return NS_ERROR_UNEXPECTED;
958 error3:
959 *aDestLen = dest - aDest;
960 *aSrcLen = src - (const unsigned char*)aSrc;
961 return NS_ERROR_ILLEGAL_INPUT;