1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Mozilla Communicator client code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
37 #include "nsJapaneseToUnicode.h"
39 #include "nsUCSupport.h"
41 #include "nsIPrefBranch.h"
42 #include "nsIPrefService.h"
44 #include "japanese.map"
46 #include "nsICharsetConverterManager.h"
47 #include "nsIServiceManager.h"
48 static NS_DEFINE_CID(kCharsetConverterManagerCID
, NS_ICHARSETCONVERTERMANAGER_CID
);
50 #define SJIS_INDEX mMapIndex[0]
51 #define JIS0208_INDEX mMapIndex[1]
52 #define JIS0212_INDEX gJIS0212Index
54 void nsJapaneseToUnicode::setMapMode()
60 nsCOMPtr
<nsIPrefBranch
> prefBranch
= do_GetService(NS_PREFSERVICE_CONTRACTID
);
61 if (!prefBranch
) return;
62 nsXPIDLCString prefMap
;
63 res
= prefBranch
->GetCharPref("intl.jis0208.map", getter_Copies(prefMap
));
64 if (!NS_SUCCEEDED(res
)) return;
65 nsCaseInsensitiveCStringComparator comparator
;
66 if ( prefMap
.Equals(NS_LITERAL_CSTRING("cp932"), comparator
) ) {
67 mMapIndex
= gCP932Index
;
68 } else if ( prefMap
.Equals(NS_LITERAL_CSTRING("ibm943"), comparator
) ) {
69 mMapIndex
= gIBM943Index
;
73 NS_IMETHODIMP
nsShiftJISToUnicode::Convert(
74 const char * aSrc
, PRInt32
* aSrcLen
,
75 PRUnichar
* aDest
, PRInt32
* aDestLen
)
77 static const PRUint8 sbIdx
[256] =
79 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00 */
80 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08 */
81 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10 */
82 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18 */
83 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20 */
84 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x28 */
85 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x30 */
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38 */
87 0, 1, 2, 3, 4, 5, 6, 7, /* 0x40 */
88 8, 9, 10, 11, 12, 13, 14, 15, /* 0x48 */
89 16, 17, 18, 19, 20, 21, 22, 23, /* 0x50 */
90 24, 25, 26, 27, 28, 29, 30, 31, /* 0x58 */
91 32, 33, 34, 35, 36, 37, 38, 39, /* 0x60 */
92 40, 41, 42, 43, 44, 45, 46, 47, /* 0x68 */
93 48, 49, 50, 51, 52, 53, 54, 55, /* 0x70 */
94 56, 57, 58, 59, 60, 61, 62, 0xFF, /* 0x78 */
95 63, 64, 65, 66, 67, 68, 69, 70, /* 0x80 */
96 71, 72, 73, 74, 75, 76, 77, 78, /* 0x88 */
97 79, 80, 81, 82, 83, 84, 85, 86, /* 0x90 */
98 87, 88, 89, 90, 91, 92, 93, 94, /* 0x98 */
99 95, 96, 97, 98, 99, 100, 101, 102, /* 0xa0 */
100 103, 104, 105, 106, 107, 108, 109, 110, /* 0xa8 */
101 111, 112, 113, 114, 115, 116, 117, 118, /* 0xb0 */
102 119, 120, 121, 122, 123, 124, 125, 126, /* 0xb8 */
103 127, 128, 129, 130, 131, 132, 133, 134, /* 0xc0 */
104 135, 136, 137, 138, 139, 140, 141, 142, /* 0xc8 */
105 143, 144, 145, 146, 147, 148, 149, 150, /* 0xd0 */
106 151, 152, 153, 154, 155, 156, 157, 158, /* 0xd8 */
107 159, 160, 161, 162, 163, 164, 165, 166, /* 0xe0 */
108 167, 168, 169, 170, 171, 172, 173, 174, /* 0xe8 */
109 175, 176, 177, 178, 179, 180, 181, 182, /* 0xf0 */
110 183, 184, 185, 186, 187, 0xFF, 0xFF, 0xFF, /* 0xf8 */
113 const unsigned char* srcEnd
= (unsigned char*)aSrc
+ *aSrcLen
;
114 const unsigned char* src
=(unsigned char*) aSrc
;
115 PRUnichar
* destEnd
= aDest
+ *aDestLen
;
116 PRUnichar
* dest
= aDest
;
117 while((src
< srcEnd
))
125 mData
= SJIS_INDEX
[*src
& 0x7F];
128 mState
= 1; // two bytes
132 if(0xFFFD == mData
) {
133 // IE-compatible handling of undefined codepoints:
141 *dest
++ = (PRUnichar
) *src
;
145 *dest
++ = (PRUnichar
) 0xf8f0;
151 *dest
++ = (PRUnichar
) 0xf8f1 +
152 (*src
- (unsigned char)(0xfd));
161 *dest
++ = mData
; // JIS 0201
171 *dest
++ = (PRUnichar
) *src
;
177 case 1: // Index to table
179 PRUint8 off
= sbIdx
[*src
];
183 PRUnichar ch
= gJapaneseMap
[mData
+off
];
196 PRUint8 off
= sbIdx
[*src
];
200 *dest
++ = mData
+ off
;
211 *aDestLen
= dest
- aDest
;
214 *aDestLen
= dest
-aDest
;
216 if ((mState
== 0) && (src
== srcEnd
)) {
219 *aSrcLen
= src
- (const unsigned char*)aSrc
;
220 return NS_OK_UDEC_MOREOUTPUT
;
226 NS_IMETHODIMP
nsEUCJPToUnicodeV2::Convert(
227 const char * aSrc
, PRInt32
* aSrcLen
,
228 PRUnichar
* aDest
, PRInt32
* aDestLen
)
230 static const PRUint8 sbIdx
[256] =
233 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
234 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
236 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
237 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
239 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
240 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
242 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
243 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
245 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
246 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
248 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
249 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
251 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
252 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
254 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
255 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
257 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
258 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
260 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
261 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
263 0xFF, 0, 1, 2, 3, 4, 5, 6,
264 7, 8 , 9, 10, 11, 12, 13, 14,
266 15, 16, 17, 18, 19, 20, 21, 22,
267 23, 24, 25, 26, 27, 28, 29, 30,
269 31, 32, 33, 34, 35, 36, 37, 38,
270 39, 40, 41, 42, 43, 44, 45, 46,
272 47, 48, 49, 50, 51, 52, 53, 54,
273 55, 56, 57, 58, 59, 60, 61, 62,
275 63, 64, 65, 66, 67, 68, 69, 70,
276 71, 72, 73, 74, 75, 76, 77, 78,
278 79, 80, 81, 82, 83, 84, 85, 86,
279 87, 88, 89, 90, 91, 92, 93, 0xFF,
282 const unsigned char* srcEnd
= (unsigned char*)aSrc
+ *aSrcLen
;
283 const unsigned char* src
=(unsigned char*) aSrc
;
284 PRUnichar
* destEnd
= aDest
+ *aDestLen
;
285 PRUnichar
* dest
= aDest
;
286 while((src
< srcEnd
))
291 if(*src
& 0x80 && *src
!= (unsigned char)0xa0)
293 mData
= JIS0208_INDEX
[*src
& 0x7F];
296 mState
= 1; // two byte JIS0208
300 mState
= 2; // JIS0201
301 } else if(0x8f == *src
) {
303 mState
= 3; // JIS0212
313 *dest
++ = (PRUnichar
) *src
;
319 case 1: // Index to table
321 PRUint8 off
= sbIdx
[*src
];
324 // if the first byte is valid for EUC-JP but the second
325 // is not while being a valid US-ASCII(i.e. < 0xc0), save it
326 // instead of eating it up !
327 if ( ! (*src
& 0xc0) )
328 *dest
++ = (PRUnichar
) *src
;;
330 *dest
++ = gJapaneseMap
[mData
+off
];
340 if((0xA1 <= *src
) && (*src
<= 0xDF)) {
341 *dest
++ = (0xFF61-0x00A1) + *src
;
344 // if 0x8e is not followed by a valid JIS X 0201 byte
345 // but by a valid US-ASCII, save it instead of eating it up.
346 if ( (PRUint8
)*src
< (PRUint8
)0x7f )
347 *dest
++ = (PRUnichar
) *src
;
359 mData
= JIS0212_INDEX
[*src
& 0x7F];
373 PRUint8 off
= sbIdx
[*src
];
377 *dest
++ = gJapaneseMap
[mData
+off
];
384 case 5: // two bytes undefined
395 *aDestLen
= dest
- aDest
;
398 *aDestLen
= dest
-aDest
;
400 if ((mState
== 0) && (src
== srcEnd
)) {
403 *aSrcLen
= src
- (const unsigned char*)aSrc
;
404 return NS_OK_UDEC_MOREOUTPUT
;
409 NS_IMETHODIMP
nsISO2022JPToUnicodeV2::Convert(
410 const char * aSrc
, PRInt32
* aSrcLen
,
411 PRUnichar
* aDest
, PRInt32
* aDestLen
)
413 static const PRUint16 fbIdx
[128] =
416 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
417 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
419 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
420 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
422 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6,
423 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14,
425 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22,
426 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30,
428 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38,
429 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46,
431 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54,
432 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62,
434 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70,
435 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78,
437 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86,
438 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD,
440 static const PRUint8 sbIdx
[256] =
443 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
444 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
446 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
447 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
449 0xFF, 0, 1, 2, 3, 4, 5, 6,
450 7, 8 , 9, 10, 11, 12, 13, 14,
452 15, 16, 17, 18, 19, 20, 21, 22,
453 23, 24, 25, 26, 27, 28, 29, 30,
455 31, 32, 33, 34, 35, 36, 37, 38,
456 39, 40, 41, 42, 43, 44, 45, 46,
458 47, 48, 49, 50, 51, 52, 53, 54,
459 55, 56, 57, 58, 59, 60, 61, 62,
461 63, 64, 65, 66, 67, 68, 69, 70,
462 71, 72, 73, 74, 75, 76, 77, 78,
464 79, 80, 81, 82, 83, 84, 85, 86,
465 87, 88, 89, 90, 91, 92, 93, 0xFF,
467 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
468 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
470 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
471 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
473 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
474 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
476 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
477 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
479 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
480 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
482 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
483 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
485 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
486 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
488 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
489 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
492 const unsigned char* srcEnd
= (unsigned char*)aSrc
+ *aSrcLen
;
493 const unsigned char* src
=(unsigned char*) aSrc
;
494 PRUnichar
* destEnd
= aDest
+ *aDestLen
;
495 PRUnichar
* dest
= aDest
;
496 while((src
< srcEnd
))
504 mLastLegalState
= mState
;
506 } else if(*src
& 0x80) {
509 *dest
++ = (PRUnichar
) *src
;
517 mState
= mState_ESC_28
;
518 } else if ('$' == *src
) {
519 mState
= mState_ESC_24
;
520 } else if ('.' == *src
) { // for ISO-2022-JP-2
521 mState
= mState_ESC_2e
;
522 } else if ('N' == *src
) { // for ISO-2022-JP-2
523 mState
= mState_ESC_4e
;
525 if((dest
+2) >= destEnd
)
527 *dest
++ = (PRUnichar
) 0x1b;
530 *dest
++ = (PRUnichar
) *src
;
531 mState
= mLastLegalState
;
535 case mState_ESC_28
: // ESC (
537 mState
= mState_ASCII
;
538 if (mRunLength
== 0) {
539 if((dest
+1) >= destEnd
)
544 } else if ('J' == *src
) {
545 mState
= mState_JISX0201_1976Roman
;
546 if (mRunLength
== 0 && mLastLegalState
!= mState_ASCII
) {
547 if((dest
+1) >= destEnd
)
552 } else if ('I' == *src
) {
553 mState
= mState_JISX0201_1976Kana
;
556 if((dest
+3) >= destEnd
)
558 *dest
++ = (PRUnichar
) 0x1b;
559 *dest
++ = (PRUnichar
) '(';
562 *dest
++ = (PRUnichar
) *src
;
563 mState
= mLastLegalState
;
567 case mState_ESC_24
: // ESC $
569 mState
= mState_JISX0208_1978
;
571 } else if ('A' == *src
) {
572 mState
= mState_GB2312_1980
;
574 } else if ('B' == *src
) {
575 mState
= mState_JISX0208_1983
;
577 } else if ('(' == *src
) {
578 mState
= mState_ESC_24_28
;
580 if((dest
+3) >= destEnd
)
582 *dest
++ = (PRUnichar
) 0x1b;
583 *dest
++ = (PRUnichar
) '$';
586 *dest
++ = (PRUnichar
) *src
;
587 mState
= mLastLegalState
;
591 case mState_ESC_24_28
: // ESC $ (
593 mState
= mState_KSC5601_1987
;
595 } else if ('D' == *src
) {
596 mState
= mState_JISX0212_1990
;
599 if((dest
+4) >= destEnd
)
601 *dest
++ = (PRUnichar
) 0x1b;
602 *dest
++ = (PRUnichar
) '$';
603 *dest
++ = (PRUnichar
) '(';
606 *dest
++ = (PRUnichar
) *src
;
607 mState
= mLastLegalState
;
611 case mState_JISX0201_1976Roman
:
613 mLastLegalState
= mState
;
615 } else if(*src
& 0x80) {
618 // XXX We need to decide how to handle \ and ~ here
619 // we may need a if statement here for '\' and '~'
620 // to map them to Yen and Overbar
621 *dest
++ = (PRUnichar
) *src
;
628 case mState_JISX0201_1976Kana
:
630 mLastLegalState
= mState
;
633 if((0x21 <= *src
) && (*src
<= 0x5F)) {
634 *dest
++ = (0xFF61-0x0021) + *src
;
644 case mState_JISX0208_1978
:
646 mLastLegalState
= mState
;
648 } else if(*src
& 0x80) {
649 mLastLegalState
= mState
;
650 mState
= mState_ERROR
;
652 mData
= JIS0208_INDEX
[*src
& 0x7F];
655 mState
= mState_JISX0208_1978_2ndbyte
;
659 case mState_GB2312_1980
:
661 mLastLegalState
= mState
;
663 } else if(*src
& 0x80) {
664 mLastLegalState
= mState
;
665 mState
= mState_ERROR
;
667 mData
= fbIdx
[*src
& 0x7F];
670 mState
= mState_GB2312_1980_2ndbyte
;
674 case mState_JISX0208_1983
:
676 mLastLegalState
= mState
;
678 } else if(*src
& 0x80) {
679 mLastLegalState
= mState
;
680 mState
= mState_ERROR
;
682 mData
= JIS0208_INDEX
[*src
& 0x7F];
685 mState
= mState_JISX0208_1983_2ndbyte
;
689 case mState_KSC5601_1987
:
691 mLastLegalState
= mState
;
693 } else if(*src
& 0x80) {
694 mLastLegalState
= mState
;
695 mState
= mState_ERROR
;
697 mData
= fbIdx
[*src
& 0x7F];
700 mState
= mState_KSC5601_1987_2ndbyte
;
704 case mState_JISX0212_1990
:
706 mLastLegalState
= mState
;
708 } else if(*src
& 0x80) {
709 mLastLegalState
= mState
;
710 mState
= mState_ERROR
;
712 mData
= JIS0212_INDEX
[*src
& 0x7F];
715 mState
= mState_JISX0212_1990_2ndbyte
;
719 case mState_JISX0208_1978_2ndbyte
:
721 PRUint8 off
= sbIdx
[*src
];
725 // XXX We need to map from JIS X 0208 1983 to 1987
726 // in the next line before pass to *dest++
727 *dest
++ = gJapaneseMap
[mData
+off
];
730 mState
= mState_JISX0208_1978
;
736 case mState_GB2312_1980_2ndbyte
:
738 PRUint8 off
= sbIdx
[*src
];
742 if (!mGB2312Decoder
) {
743 // creating a delegate converter (GB2312)
745 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
746 do_GetService(kCharsetConverterManagerCID
, &rv
);
747 if (NS_SUCCEEDED(rv
)) {
748 rv
= ccm
->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder
);
751 if (!mGB2312Decoder
) {// failed creating a delegate converter
756 PRInt32 gbLen
= 2, uniLen
= 1;
757 // ((mData/94)+0x21) is the original 1st byte.
758 // *src is the present 2nd byte.
759 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
760 gb
[0] = ((mData
/ 94) + 0x21) | 0x80;
762 // Convert GB2312 to unicode.
763 mGB2312Decoder
->Convert((const char *)gb
, &gbLen
,
769 mState
= mState_GB2312_1980
;
775 case mState_JISX0208_1983_2ndbyte
:
777 PRUint8 off
= sbIdx
[*src
];
781 *dest
++ = gJapaneseMap
[mData
+off
];
784 mState
= mState_JISX0208_1983
;
790 case mState_KSC5601_1987_2ndbyte
:
792 PRUint8 off
= sbIdx
[*src
];
796 if (!mEUCKRDecoder
) {
797 // creating a delegate converter (EUC-KR)
799 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
800 do_GetService(kCharsetConverterManagerCID
, &rv
);
801 if (NS_SUCCEEDED(rv
)) {
802 rv
= ccm
->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder
);
805 if (!mEUCKRDecoder
) {// failed creating a delegate converter
808 unsigned char ksc
[2];
810 PRInt32 kscLen
= 2, uniLen
= 1;
811 // ((mData/94)+0x21) is the original 1st byte.
812 // *src is the present 2nd byte.
813 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
814 ksc
[0] = ((mData
/ 94) + 0x21) | 0x80;
815 ksc
[1] = *src
| 0x80;
816 // Convert EUC-KR to unicode.
817 mEUCKRDecoder
->Convert((const char *)ksc
, &kscLen
,
823 mState
= mState_KSC5601_1987
;
829 case mState_JISX0212_1990_2ndbyte
:
831 PRUint8 off
= sbIdx
[*src
];
835 *dest
++ = gJapaneseMap
[mData
+off
];
838 mState
= mState_JISX0212_1990
;
844 case mState_ESC_2e
: // ESC .
845 // "ESC ." will designate 96 character set to G2.
846 mState
= mLastLegalState
;
848 G2charset
= G2_ISO88591
;
849 } else if ('F' == *src
) {
850 G2charset
= G2_ISO88597
;
852 if((dest
+3) >= destEnd
)
854 *dest
++ = (PRUnichar
) 0x1b;
855 *dest
++ = (PRUnichar
) '.';
858 *dest
++ = (PRUnichar
) *src
;
862 case mState_ESC_4e
: // ESC N
863 // "ESC N" is the SS2 sequence, that invoke a G2 designated
864 // character set. Since SS2 is effective only for next one
865 // character, mState should be returned to the last status.
866 mState
= mLastLegalState
;
867 if((0x20 <= *src
) && (*src
<= 0x7F)) {
868 if (G2_ISO88591
== G2charset
) {
869 *dest
++ = *src
| 0x80;
871 } else if (G2_ISO88597
== G2charset
) {
872 if (!mISO88597Decoder
) {
873 // creating a delegate converter (ISO-8859-7)
875 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
876 do_GetService(kCharsetConverterManagerCID
, &rv
);
877 if (NS_SUCCEEDED(rv
)) {
878 rv
= ccm
->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder
);
881 if (!mISO88597Decoder
) {// failed creating a delegate converter
884 // Put one character with ISO-8859-7 encoding.
885 unsigned char gr
= *src
| 0x80;
887 PRInt32 grLen
= 1, uniLen
= 1;
888 // Convert ISO-8859-7 to unicode.
889 mISO88597Decoder
->Convert((const char *)&gr
, &grLen
,
894 } else {// G2charset is G2_unknown (not designated yet)
900 if((dest
+3) >= destEnd
)
902 *dest
++ = (PRUnichar
) 0x1b;
903 *dest
++ = (PRUnichar
) 'N';
906 *dest
++ = (PRUnichar
) *src
;
911 mState
= mLastLegalState
;
919 *aDestLen
= dest
- aDest
;
922 *aDestLen
= dest
-aDest
;
924 if ((mState
== 0) && (src
== srcEnd
)) {
927 *aSrcLen
= src
- (const unsigned char*)aSrc
;
928 return NS_OK_UDEC_MOREOUTPUT
;
930 *aSrcLen
= src
- (const unsigned char*)aSrc
;
931 *aDestLen
= dest
-aDest
;
932 return NS_ERROR_UNEXPECTED
;