Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / intl / uconv / ucvja / nsJapaneseToUnicode.cpp
blob629884f9f3ebb4377e385ede708930b9ae2c5735
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is Mozilla Communicator client code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
37 #include "nsJapaneseToUnicode.h"
39 #include "nsUCSupport.h"
41 #include "nsIPrefBranch.h"
42 #include "nsIPrefService.h"
44 #include "japanese.map"
46 #include "nsICharsetConverterManager.h"
47 #include "nsIServiceManager.h"
48 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
50 #define SJIS_INDEX mMapIndex[0]
51 #define JIS0208_INDEX mMapIndex[1]
52 #define JIS0212_INDEX gJIS0212Index
54 void nsJapaneseToUnicode::setMapMode()
56 nsresult res;
58 mMapIndex = gIndex;
60 nsCOMPtr<nsIPrefBranch> prefBranch = do_GetService(NS_PREFSERVICE_CONTRACTID);
61 if (!prefBranch) return;
62 nsXPIDLCString prefMap;
63 res = prefBranch->GetCharPref("intl.jis0208.map", getter_Copies(prefMap));
64 if (!NS_SUCCEEDED(res)) return;
65 nsCaseInsensitiveCStringComparator comparator;
66 if ( prefMap.Equals(NS_LITERAL_CSTRING("cp932"), comparator) ) {
67 mMapIndex = gCP932Index;
68 } else if ( prefMap.Equals(NS_LITERAL_CSTRING("ibm943"), comparator) ) {
69 mMapIndex = gIBM943Index;
73 NS_IMETHODIMP nsShiftJISToUnicode::Convert(
74 const char * aSrc, PRInt32 * aSrcLen,
75 PRUnichar * aDest, PRInt32 * aDestLen)
77 static const PRUint8 sbIdx[256] =
79 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00 */
80 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08 */
81 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10 */
82 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18 */
83 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20 */
84 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x28 */
85 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x30 */
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38 */
87 0, 1, 2, 3, 4, 5, 6, 7, /* 0x40 */
88 8, 9, 10, 11, 12, 13, 14, 15, /* 0x48 */
89 16, 17, 18, 19, 20, 21, 22, 23, /* 0x50 */
90 24, 25, 26, 27, 28, 29, 30, 31, /* 0x58 */
91 32, 33, 34, 35, 36, 37, 38, 39, /* 0x60 */
92 40, 41, 42, 43, 44, 45, 46, 47, /* 0x68 */
93 48, 49, 50, 51, 52, 53, 54, 55, /* 0x70 */
94 56, 57, 58, 59, 60, 61, 62, 0xFF, /* 0x78 */
95 63, 64, 65, 66, 67, 68, 69, 70, /* 0x80 */
96 71, 72, 73, 74, 75, 76, 77, 78, /* 0x88 */
97 79, 80, 81, 82, 83, 84, 85, 86, /* 0x90 */
98 87, 88, 89, 90, 91, 92, 93, 94, /* 0x98 */
99 95, 96, 97, 98, 99, 100, 101, 102, /* 0xa0 */
100 103, 104, 105, 106, 107, 108, 109, 110, /* 0xa8 */
101 111, 112, 113, 114, 115, 116, 117, 118, /* 0xb0 */
102 119, 120, 121, 122, 123, 124, 125, 126, /* 0xb8 */
103 127, 128, 129, 130, 131, 132, 133, 134, /* 0xc0 */
104 135, 136, 137, 138, 139, 140, 141, 142, /* 0xc8 */
105 143, 144, 145, 146, 147, 148, 149, 150, /* 0xd0 */
106 151, 152, 153, 154, 155, 156, 157, 158, /* 0xd8 */
107 159, 160, 161, 162, 163, 164, 165, 166, /* 0xe0 */
108 167, 168, 169, 170, 171, 172, 173, 174, /* 0xe8 */
109 175, 176, 177, 178, 179, 180, 181, 182, /* 0xf0 */
110 183, 184, 185, 186, 187, 0xFF, 0xFF, 0xFF, /* 0xf8 */
113 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
114 const unsigned char* src =(unsigned char*) aSrc;
115 PRUnichar* destEnd = aDest + *aDestLen;
116 PRUnichar* dest = aDest;
117 while((src < srcEnd))
119 switch(mState)
122 case 0:
123 if(*src & 0x80)
125 mData = SJIS_INDEX[*src & 0x7F];
126 if(mData < 0xE000 )
128 mState = 1; // two bytes
129 } else {
130 if( mData > 0xFF00)
132 if(0xFFFD == mData) {
133 // IE-compatible handling of undefined codepoints:
134 // 0x80 --> U+0080
135 // 0xa0 --> U+F8F0
136 // 0xfd --> U+F8F1
137 // 0xfe --> U+F8F2
138 // 0xff --> U+F8F3
139 switch (*src) {
140 case 0x80:
141 *dest++ = (PRUnichar) *src;
142 break;
144 case 0xa0:
145 *dest++ = (PRUnichar) 0xf8f0;
146 break;
148 case 0xfd:
149 case 0xfe:
150 case 0xff:
151 *dest++ = (PRUnichar) 0xf8f1 +
152 (*src - (unsigned char)(0xfd));
153 break;
155 default:
156 *dest++ = 0x30FB;
158 if(dest >= destEnd)
159 goto error1;
160 } else {
161 *dest++ = mData; // JIS 0201
162 if(dest >= destEnd)
163 goto error1;
165 } else {
166 mState = 2; // EUDC
169 } else {
170 // ASCII
171 *dest++ = (PRUnichar) *src;
172 if(dest >= destEnd)
173 goto error1;
175 break;
177 case 1: // Index to table
179 PRUint8 off = sbIdx[*src];
180 if(0xFF == off) {
181 *dest++ = 0x30FB;
182 } else {
183 PRUnichar ch = gJapaneseMap[mData+off];
184 if(ch == 0xfffd)
185 ch = 0x30fb;
186 *dest++ = ch;
188 mState = 0;
189 if(dest >= destEnd)
190 goto error1;
192 break;
194 case 2: // EUDC
196 PRUint8 off = sbIdx[*src];
197 if(0xFF == off) {
198 *dest++ = 0x30fb;
199 } else {
200 *dest++ = mData + off;
202 mState = 0;
203 if(dest >= destEnd)
204 goto error1;
206 break;
209 src++;
211 *aDestLen = dest - aDest;
212 return NS_OK;
213 error1:
214 *aDestLen = dest-aDest;
215 src++;
216 if ((mState == 0) && (src == srcEnd)) {
217 return NS_OK;
219 *aSrcLen = src - (const unsigned char*)aSrc;
220 return NS_OK_UDEC_MOREOUTPUT;
226 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
227 const char * aSrc, PRInt32 * aSrcLen,
228 PRUnichar * aDest, PRInt32 * aDestLen)
230 static const PRUint8 sbIdx[256] =
232 /* 0x0X */
233 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
234 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
235 /* 0x1X */
236 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
237 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
238 /* 0x2X */
239 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
240 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
241 /* 0x3X */
242 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
243 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
244 /* 0x4X */
245 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
246 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
247 /* 0x5X */
248 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
249 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
250 /* 0x6X */
251 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
252 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
253 /* 0x7X */
254 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
255 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
256 /* 0x8X */
257 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
258 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
259 /* 0x9X */
260 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
261 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
262 /* 0xAX */
263 0xFF, 0, 1, 2, 3, 4, 5, 6,
264 7, 8 , 9, 10, 11, 12, 13, 14,
265 /* 0xBX */
266 15, 16, 17, 18, 19, 20, 21, 22,
267 23, 24, 25, 26, 27, 28, 29, 30,
268 /* 0xCX */
269 31, 32, 33, 34, 35, 36, 37, 38,
270 39, 40, 41, 42, 43, 44, 45, 46,
271 /* 0xDX */
272 47, 48, 49, 50, 51, 52, 53, 54,
273 55, 56, 57, 58, 59, 60, 61, 62,
274 /* 0xEX */
275 63, 64, 65, 66, 67, 68, 69, 70,
276 71, 72, 73, 74, 75, 76, 77, 78,
277 /* 0xFX */
278 79, 80, 81, 82, 83, 84, 85, 86,
279 87, 88, 89, 90, 91, 92, 93, 0xFF,
282 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
283 const unsigned char* src =(unsigned char*) aSrc;
284 PRUnichar* destEnd = aDest + *aDestLen;
285 PRUnichar* dest = aDest;
286 while((src < srcEnd))
288 switch(mState)
290 case 0:
291 if(*src & 0x80 && *src != (unsigned char)0xa0)
293 mData = JIS0208_INDEX[*src & 0x7F];
294 if(mData != 0xFFFD )
296 mState = 1; // two byte JIS0208
297 } else {
298 if( 0x8e == *src) {
299 // JIS 0201
300 mState = 2; // JIS0201
301 } else if(0x8f == *src) {
302 // JIS 0212
303 mState = 3; // JIS0212
304 } else {
305 // others
306 *dest++ = 0xFFFD;
307 if(dest >= destEnd)
308 goto error1;
311 } else {
312 // ASCII
313 *dest++ = (PRUnichar) *src;
314 if(dest >= destEnd)
315 goto error1;
317 break;
319 case 1: // Index to table
321 PRUint8 off = sbIdx[*src];
322 if(0xFF == off) {
323 *dest++ = 0xFFFD;
324 // if the first byte is valid for EUC-JP but the second
325 // is not while being a valid US-ASCII(i.e. < 0xc0), save it
326 // instead of eating it up !
327 if ( ! (*src & 0xc0) )
328 *dest++ = (PRUnichar) *src;;
329 } else {
330 *dest++ = gJapaneseMap[mData+off];
332 mState = 0;
333 if(dest >= destEnd)
334 goto error1;
336 break;
338 case 2: // JIS 0201
340 if((0xA1 <= *src) && (*src <= 0xDF)) {
341 *dest++ = (0xFF61-0x00A1) + *src;
342 } else {
343 *dest++ = 0xFFFD;
344 // if 0x8e is not followed by a valid JIS X 0201 byte
345 // but by a valid US-ASCII, save it instead of eating it up.
346 if ( (PRUint8)*src < (PRUint8)0x7f )
347 *dest++ = (PRUnichar) *src;
349 mState = 0;
350 if(dest >= destEnd)
351 goto error1;
353 break;
355 case 3: // JIS 0212
357 if(*src & 0x80)
359 mData = JIS0212_INDEX[*src & 0x7F];
360 if(mData != 0xFFFD )
362 mState = 4;
363 } else {
364 mState = 5; // error
366 } else {
367 mState = 5; // error
370 break;
371 case 4:
373 PRUint8 off = sbIdx[*src];
374 if(0xFF == off) {
375 *dest++ = 0xFFFD;
376 } else {
377 *dest++ = gJapaneseMap[mData+off];
379 mState = 0;
380 if(dest >= destEnd)
381 goto error1;
383 break;
384 case 5: // two bytes undefined
386 *dest++ = 0xFFFD;
387 mState = 0;
388 if(dest >= destEnd)
389 goto error1;
391 break;
393 src++;
395 *aDestLen = dest - aDest;
396 return NS_OK;
397 error1:
398 *aDestLen = dest-aDest;
399 src++;
400 if ((mState == 0) && (src == srcEnd)) {
401 return NS_OK;
403 *aSrcLen = src - (const unsigned char*)aSrc;
404 return NS_OK_UDEC_MOREOUTPUT;
409 NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
410 const char * aSrc, PRInt32 * aSrcLen,
411 PRUnichar * aDest, PRInt32 * aDestLen)
413 static const PRUint16 fbIdx[128] =
415 /* 0x8X */
416 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
417 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
418 /* 0x9X */
419 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
420 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
421 /* 0xAX */
422 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6,
423 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14,
424 /* 0xBX */
425 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22,
426 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30,
427 /* 0xCX */
428 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38,
429 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46,
430 /* 0xDX */
431 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54,
432 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62,
433 /* 0xEX */
434 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70,
435 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78,
436 /* 0xFX */
437 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86,
438 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD,
440 static const PRUint8 sbIdx[256] =
442 /* 0x0X */
443 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
444 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
445 /* 0x1X */
446 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
447 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
448 /* 0x2X */
449 0xFF, 0, 1, 2, 3, 4, 5, 6,
450 7, 8 , 9, 10, 11, 12, 13, 14,
451 /* 0x3X */
452 15, 16, 17, 18, 19, 20, 21, 22,
453 23, 24, 25, 26, 27, 28, 29, 30,
454 /* 0x4X */
455 31, 32, 33, 34, 35, 36, 37, 38,
456 39, 40, 41, 42, 43, 44, 45, 46,
457 /* 0x5X */
458 47, 48, 49, 50, 51, 52, 53, 54,
459 55, 56, 57, 58, 59, 60, 61, 62,
460 /* 0x6X */
461 63, 64, 65, 66, 67, 68, 69, 70,
462 71, 72, 73, 74, 75, 76, 77, 78,
463 /* 0x7X */
464 79, 80, 81, 82, 83, 84, 85, 86,
465 87, 88, 89, 90, 91, 92, 93, 0xFF,
466 /* 0x8X */
467 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
468 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
469 /* 0x9X */
470 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
471 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
472 /* 0xAX */
473 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
474 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
475 /* 0xBX */
476 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
477 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
478 /* 0xCX */
479 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
480 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
481 /* 0xDX */
482 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
483 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
484 /* 0xEX */
485 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
486 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
487 /* 0xFX */
488 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
489 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
492 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
493 const unsigned char* src =(unsigned char*) aSrc;
494 PRUnichar* destEnd = aDest + *aDestLen;
495 PRUnichar* dest = aDest;
496 while((src < srcEnd))
499 switch(mState)
501 case mState_ASCII:
502 if(0x1b == *src)
504 mLastLegalState = mState;
505 mState = mState_ESC;
506 } else if(*src & 0x80) {
507 goto error2;
508 } else {
509 *dest++ = (PRUnichar) *src;
510 if(dest >= destEnd)
511 goto error1;
513 break;
515 case mState_ESC:
516 if( '(' == *src) {
517 mState = mState_ESC_28;
518 } else if ('$' == *src) {
519 mState = mState_ESC_24;
520 } else if ('.' == *src) { // for ISO-2022-JP-2
521 mState = mState_ESC_2e;
522 } else if ('N' == *src) { // for ISO-2022-JP-2
523 mState = mState_ESC_4e;
524 } else {
525 if((dest+2) >= destEnd)
526 goto error1;
527 *dest++ = (PRUnichar) 0x1b;
528 if(0x80 & *src)
529 goto error2;
530 *dest++ = (PRUnichar) *src;
531 mState = mLastLegalState;
533 break;
535 case mState_ESC_28: // ESC (
536 if( 'B' == *src) {
537 mState = mState_ASCII;
538 if (mRunLength == 0) {
539 if((dest+1) >= destEnd)
540 goto error1;
541 *dest++ = 0xFFFD;
543 mRunLength = 0;
544 } else if ('J' == *src) {
545 mState = mState_JISX0201_1976Roman;
546 if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
547 if((dest+1) >= destEnd)
548 goto error1;
549 *dest++ = 0xFFFD;
551 mRunLength = 0;
552 } else if ('I' == *src) {
553 mState = mState_JISX0201_1976Kana;
554 mRunLength = 0;
555 } else {
556 if((dest+3) >= destEnd)
557 goto error1;
558 *dest++ = (PRUnichar) 0x1b;
559 *dest++ = (PRUnichar) '(';
560 if(0x80 & *src)
561 goto error2;
562 *dest++ = (PRUnichar) *src;
563 mState = mLastLegalState;
565 break;
567 case mState_ESC_24: // ESC $
568 if( '@' == *src) {
569 mState = mState_JISX0208_1978;
570 mRunLength = 0;
571 } else if ('A' == *src) {
572 mState = mState_GB2312_1980;
573 mRunLength = 0;
574 } else if ('B' == *src) {
575 mState = mState_JISX0208_1983;
576 mRunLength = 0;
577 } else if ('(' == *src) {
578 mState = mState_ESC_24_28;
579 } else {
580 if((dest+3) >= destEnd)
581 goto error1;
582 *dest++ = (PRUnichar) 0x1b;
583 *dest++ = (PRUnichar) '$';
584 if(0x80 & *src)
585 goto error2;
586 *dest++ = (PRUnichar) *src;
587 mState = mLastLegalState;
589 break;
591 case mState_ESC_24_28: // ESC $ (
592 if( 'C' == *src) {
593 mState = mState_KSC5601_1987;
594 mRunLength = 0;
595 } else if ('D' == *src) {
596 mState = mState_JISX0212_1990;
597 mRunLength = 0;
598 } else {
599 if((dest+4) >= destEnd)
600 goto error1;
601 *dest++ = (PRUnichar) 0x1b;
602 *dest++ = (PRUnichar) '$';
603 *dest++ = (PRUnichar) '(';
604 if(0x80 & *src)
605 goto error2;
606 *dest++ = (PRUnichar) *src;
607 mState = mLastLegalState;
609 break;
611 case mState_JISX0201_1976Roman:
612 if(0x1b == *src) {
613 mLastLegalState = mState;
614 mState = mState_ESC;
615 } else if(*src & 0x80) {
616 goto error2;
617 } else {
618 // XXX We need to decide how to handle \ and ~ here
619 // we may need a if statement here for '\' and '~'
620 // to map them to Yen and Overbar
621 *dest++ = (PRUnichar) *src;
622 ++mRunLength;
623 if(dest >= destEnd)
624 goto error1;
626 break;
628 case mState_JISX0201_1976Kana:
629 if(0x1b == *src) {
630 mLastLegalState = mState;
631 mState = mState_ESC;
632 } else {
633 if((0x21 <= *src) && (*src <= 0x5F)) {
634 *dest++ = (0xFF61-0x0021) + *src;
635 ++mRunLength;
636 } else {
637 goto error2;
639 if(dest >= destEnd)
640 goto error1;
642 break;
644 case mState_JISX0208_1978:
645 if(0x1b == *src) {
646 mLastLegalState = mState;
647 mState = mState_ESC;
648 } else if(*src & 0x80) {
649 mLastLegalState = mState;
650 mState = mState_ERROR;
651 } else {
652 mData = JIS0208_INDEX[*src & 0x7F];
653 if(0xFFFD == mData)
654 goto error2;
655 mState = mState_JISX0208_1978_2ndbyte;
657 break;
659 case mState_GB2312_1980:
660 if(0x1b == *src) {
661 mLastLegalState = mState;
662 mState = mState_ESC;
663 } else if(*src & 0x80) {
664 mLastLegalState = mState;
665 mState = mState_ERROR;
666 } else {
667 mData = fbIdx[*src & 0x7F];
668 if(0xFFFD == mData)
669 goto error2;
670 mState = mState_GB2312_1980_2ndbyte;
672 break;
674 case mState_JISX0208_1983:
675 if(0x1b == *src) {
676 mLastLegalState = mState;
677 mState = mState_ESC;
678 } else if(*src & 0x80) {
679 mLastLegalState = mState;
680 mState = mState_ERROR;
681 } else {
682 mData = JIS0208_INDEX[*src & 0x7F];
683 if(0xFFFD == mData)
684 goto error2;
685 mState = mState_JISX0208_1983_2ndbyte;
687 break;
689 case mState_KSC5601_1987:
690 if(0x1b == *src) {
691 mLastLegalState = mState;
692 mState = mState_ESC;
693 } else if(*src & 0x80) {
694 mLastLegalState = mState;
695 mState = mState_ERROR;
696 } else {
697 mData = fbIdx[*src & 0x7F];
698 if(0xFFFD == mData)
699 goto error2;
700 mState = mState_KSC5601_1987_2ndbyte;
702 break;
704 case mState_JISX0212_1990:
705 if(0x1b == *src) {
706 mLastLegalState = mState;
707 mState = mState_ESC;
708 } else if(*src & 0x80) {
709 mLastLegalState = mState;
710 mState = mState_ERROR;
711 } else {
712 mData = JIS0212_INDEX[*src & 0x7F];
713 if(0xFFFD == mData)
714 goto error2;
715 mState = mState_JISX0212_1990_2ndbyte;
717 break;
719 case mState_JISX0208_1978_2ndbyte:
721 PRUint8 off = sbIdx[*src];
722 if(0xFF == off) {
723 goto error2;
724 } else {
725 // XXX We need to map from JIS X 0208 1983 to 1987
726 // in the next line before pass to *dest++
727 *dest++ = gJapaneseMap[mData+off];
728 ++mRunLength;
730 mState = mState_JISX0208_1978;
731 if(dest >= destEnd)
732 goto error1;
734 break;
736 case mState_GB2312_1980_2ndbyte:
738 PRUint8 off = sbIdx[*src];
739 if(0xFF == off) {
740 goto error2;
741 } else {
742 if (!mGB2312Decoder) {
743 // creating a delegate converter (GB2312)
744 nsresult rv;
745 nsCOMPtr<nsICharsetConverterManager> ccm =
746 do_GetService(kCharsetConverterManagerCID, &rv);
747 if (NS_SUCCEEDED(rv)) {
748 rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder);
751 if (!mGB2312Decoder) {// failed creating a delegate converter
752 goto error2;
753 } else {
754 unsigned char gb[2];
755 PRUnichar uni;
756 PRInt32 gbLen = 2, uniLen = 1;
757 // ((mData/94)+0x21) is the original 1st byte.
758 // *src is the present 2nd byte.
759 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
760 gb[0] = ((mData / 94) + 0x21) | 0x80;
761 gb[1] = *src | 0x80;
762 // Convert GB2312 to unicode.
763 mGB2312Decoder->Convert((const char *)gb, &gbLen,
764 &uni, &uniLen);
765 *dest++ = uni;
766 ++mRunLength;
769 mState = mState_GB2312_1980;
770 if(dest >= destEnd)
771 goto error1;
773 break;
775 case mState_JISX0208_1983_2ndbyte:
777 PRUint8 off = sbIdx[*src];
778 if(0xFF == off) {
779 goto error2;
780 } else {
781 *dest++ = gJapaneseMap[mData+off];
782 ++mRunLength;
784 mState = mState_JISX0208_1983;
785 if(dest >= destEnd)
786 goto error1;
788 break;
790 case mState_KSC5601_1987_2ndbyte:
792 PRUint8 off = sbIdx[*src];
793 if(0xFF == off) {
794 goto error2;
795 } else {
796 if (!mEUCKRDecoder) {
797 // creating a delegate converter (EUC-KR)
798 nsresult rv;
799 nsCOMPtr<nsICharsetConverterManager> ccm =
800 do_GetService(kCharsetConverterManagerCID, &rv);
801 if (NS_SUCCEEDED(rv)) {
802 rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
805 if (!mEUCKRDecoder) {// failed creating a delegate converter
806 goto error2;
807 } else {
808 unsigned char ksc[2];
809 PRUnichar uni;
810 PRInt32 kscLen = 2, uniLen = 1;
811 // ((mData/94)+0x21) is the original 1st byte.
812 // *src is the present 2nd byte.
813 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
814 ksc[0] = ((mData / 94) + 0x21) | 0x80;
815 ksc[1] = *src | 0x80;
816 // Convert EUC-KR to unicode.
817 mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
818 &uni, &uniLen);
819 *dest++ = uni;
820 ++mRunLength;
823 mState = mState_KSC5601_1987;
824 if(dest >= destEnd)
825 goto error1;
827 break;
829 case mState_JISX0212_1990_2ndbyte:
831 PRUint8 off = sbIdx[*src];
832 if(0xFF == off) {
833 goto error2;
834 } else {
835 *dest++ = gJapaneseMap[mData+off];
836 ++mRunLength;
838 mState = mState_JISX0212_1990;
839 if(dest >= destEnd)
840 goto error1;
842 break;
844 case mState_ESC_2e: // ESC .
845 // "ESC ." will designate 96 character set to G2.
846 mState = mLastLegalState;
847 if( 'A' == *src) {
848 G2charset = G2_ISO88591;
849 } else if ('F' == *src) {
850 G2charset = G2_ISO88597;
851 } else {
852 if((dest+3) >= destEnd)
853 goto error1;
854 *dest++ = (PRUnichar) 0x1b;
855 *dest++ = (PRUnichar) '.';
856 if(0x80 & *src)
857 goto error2;
858 *dest++ = (PRUnichar) *src;
860 break;
862 case mState_ESC_4e: // ESC N
863 // "ESC N" is the SS2 sequence, that invoke a G2 designated
864 // character set. Since SS2 is effective only for next one
865 // character, mState should be returned to the last status.
866 mState = mLastLegalState;
867 if((0x20 <= *src) && (*src <= 0x7F)) {
868 if (G2_ISO88591 == G2charset) {
869 *dest++ = *src | 0x80;
870 ++mRunLength;
871 } else if (G2_ISO88597 == G2charset) {
872 if (!mISO88597Decoder) {
873 // creating a delegate converter (ISO-8859-7)
874 nsresult rv;
875 nsCOMPtr<nsICharsetConverterManager> ccm =
876 do_GetService(kCharsetConverterManagerCID, &rv);
877 if (NS_SUCCEEDED(rv)) {
878 rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder);
881 if (!mISO88597Decoder) {// failed creating a delegate converter
882 goto error2;
883 } else {
884 // Put one character with ISO-8859-7 encoding.
885 unsigned char gr = *src | 0x80;
886 PRUnichar uni;
887 PRInt32 grLen = 1, uniLen = 1;
888 // Convert ISO-8859-7 to unicode.
889 mISO88597Decoder->Convert((const char *)&gr, &grLen,
890 &uni, &uniLen);
891 *dest++ = uni;
892 ++mRunLength;
894 } else {// G2charset is G2_unknown (not designated yet)
895 goto error2;
897 if(dest >= destEnd)
898 goto error1;
899 } else {
900 if((dest+3) >= destEnd)
901 goto error1;
902 *dest++ = (PRUnichar) 0x1b;
903 *dest++ = (PRUnichar) 'N';
904 if(0x80 & *src)
905 goto error2;
906 *dest++ = (PRUnichar) *src;
908 break;
910 case mState_ERROR:
911 mState = mLastLegalState;
912 mRunLength = 0;
913 goto error2;
914 break;
916 } // switch
917 src++;
919 *aDestLen = dest - aDest;
920 return NS_OK;
921 error1:
922 *aDestLen = dest-aDest;
923 src++;
924 if ((mState == 0) && (src == srcEnd)) {
925 return NS_OK;
927 *aSrcLen = src - (const unsigned char*)aSrc;
928 return NS_OK_UDEC_MOREOUTPUT;
929 error2:
930 *aSrcLen = src - (const unsigned char*)aSrc;
931 *aDestLen = dest-aDest;
932 return NS_ERROR_UNEXPECTED;