intl/uconv/ucvja/nsJapaneseToUnicode.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is Mozilla Communicator client code.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Netscape Communications Corporation.
  19  * Portions created by the Initial Developer are Copyright (C) 1998
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *
  24  * Alternatively, the contents of this file may be used under the terms of
  25  * either of the GNU General Public License Version 2 or later (the "GPL"),
  26  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  27  * in which case the provisions of the GPL or the LGPL are applicable instead
  28  * of those above. If you wish to allow use of your version of this file only
  29  * under the terms of either the GPL or the LGPL, and not to allow others to
  30  * use your version of this file under the terms of the MPL, indicate your
  31  * decision by deleting the provisions above and replace them with the notice
  32  * and other provisions required by the GPL or the LGPL. If you do not delete
  33  * the provisions above, a recipient may use your version of this file under
  34  * the terms of any one of the MPL, the GPL or the LGPL.
  35  *
  36  * ***** END LICENSE BLOCK ***** */
  37 #include "nsJapaneseToUnicode.h"
  38
  39 #include "nsUCSupport.h"
  40
  41 #include "nsIPrefBranch.h"
  42 #include "nsIPrefService.h"
  43
  44 #include "japanese.map"
  45
  46 #include "nsICharsetConverterManager.h"
  47 #include "nsIServiceManager.h"
  48 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
  49
  50 #define SJIS_INDEX mMapIndex[0]
  51 #define JIS0208_INDEX mMapIndex[1]
  52 #define JIS0212_INDEX gJIS0212Index
  53
  54 void nsJapaneseToUnicode::setMapMode()
  55 {
  56   nsresult res;
  57
  58   mMapIndex = gIndex;
  59
  60   nsCOMPtr<nsIPrefBranch> prefBranch = do_GetService(NS_PREFSERVICE_CONTRACTID);
  61   if (!prefBranch) return;
  62   nsXPIDLCString prefMap;
  63   res = prefBranch->GetCharPref("intl.jis0208.map", getter_Copies(prefMap));
  64   if (!NS_SUCCEEDED(res)) return;
  65   nsCaseInsensitiveCStringComparator comparator;
  66   if ( prefMap.Equals(NS_LITERAL_CSTRING("cp932"), comparator) ) {
  67     mMapIndex = gCP932Index;
  68   } else if ( prefMap.Equals(NS_LITERAL_CSTRING("ibm943"), comparator) ) {
  69     mMapIndex = gIBM943Index;
  70   }
  71 }
  72
  73 NS_IMETHODIMP nsShiftJISToUnicode::Convert(
  74    const char * aSrc, PRInt32 * aSrcLen,
  75      PRUnichar * aDest, PRInt32 * aDestLen)
  76 {
  77    static const PRUint8 sbIdx[256] =
  78    {
  79      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x00 */
  80      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x08 */
  81      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x10 */
  82      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x18 */
  83      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x20 */
  84      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x28 */
  85      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x30 */
  86      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x38 */
  87         0,    1,    2,    3,    4,    5,    6,    7,  /* 0x40 */
  88         8,    9,   10,   11,   12,   13,   14,   15,  /* 0x48 */
  89        16,   17,   18,   19,   20,   21,   22,   23,  /* 0x50 */
  90        24,   25,   26,   27,   28,   29,   30,   31,  /* 0x58 */
  91        32,   33,   34,   35,   36,   37,   38,   39,  /* 0x60 */
  92        40,   41,   42,   43,   44,   45,   46,   47,  /* 0x68 */
  93        48,   49,   50,   51,   52,   53,   54,   55,  /* 0x70 */
  94        56,   57,   58,   59,   60,   61,   62, 0xFF,  /* 0x78 */
  95        63,   64,   65,   66,   67,   68,   69,   70,  /* 0x80 */
  96        71,   72,   73,   74,   75,   76,   77,   78,  /* 0x88 */
  97        79,   80,   81,   82,   83,   84,   85,   86,  /* 0x90 */
  98        87,   88,   89,   90,   91,   92,   93,   94,  /* 0x98 */
  99        95,   96,   97,   98,   99,  100,  101,  102,  /* 0xa0 */
 100       103,  104,  105,  106,  107,  108,  109,  110,  /* 0xa8 */
 101       111,  112,  113,  114,  115,  116,  117,  118,  /* 0xb0 */
 102       119,  120,  121,  122,  123,  124,  125,  126,  /* 0xb8 */
 103       127,  128,  129,  130,  131,  132,  133,  134,  /* 0xc0 */
 104       135,  136,  137,  138,  139,  140,  141,  142,  /* 0xc8 */
 105       143,  144,  145,  146,  147,  148,  149,  150,  /* 0xd0 */
 106       151,  152,  153,  154,  155,  156,  157,  158,  /* 0xd8 */
 107       159,  160,  161,  162,  163,  164,  165,  166,  /* 0xe0 */
 108       167,  168,  169,  170,  171,  172,  173,  174,  /* 0xe8 */
 109       175,  176,  177,  178,  179,  180,  181,  182,  /* 0xf0 */
 110       183,  184,  185,  186,  187, 0xFF, 0xFF, 0xFF,  /* 0xf8 */
 111    };
 112
 113    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
 114    const unsigned char* src =(unsigned char*) aSrc;
 115    PRUnichar* destEnd = aDest + *aDestLen;
 116    PRUnichar* dest = aDest;
 117    while((src < srcEnd))
 118    {
 119        switch(mState)
 120        {
 121
 122           case 0:
 123           if(*src & 0x80)
 124           {
 125             mData = SJIS_INDEX[*src & 0x7F];
 126             if(mData < 0xE000 )
 127             {
 128                mState = 1; // two bytes
 129             } else {
 130                if( mData > 0xFF00)
 131                {
 132                  if(0xFFFD == mData) {
 133                    // IE-compatible handling of undefined codepoints:
 134                    // 0x80 --> U+0080
 135                    // 0xa0 --> U+F8F0
 136                    // 0xfd --> U+F8F1
 137                    // 0xfe --> U+F8F2
 138                    // 0xff --> U+F8F3
 139                    switch (*src) {
 140                      case 0x80:
 141                        *dest++ = (PRUnichar) *src;
 142                        break;
 143
 144                      case 0xa0:
 145                        *dest++ = (PRUnichar) 0xf8f0;
 146                        break;
 147
 148                      case 0xfd:
 149                      case 0xfe:
 150                      case 0xff:
 151                        *dest++ = (PRUnichar) 0xf8f1 +
 152                                    (*src - (unsigned char)(0xfd));
 153                        break;
 154
 155                      default:
 156                        *dest++ = 0x30FB;
 157                    }
 158                    if(dest >= destEnd)
 159                      goto error1;
 160                  } else {
 161                    *dest++ = mData; // JIS 0201
 162                    if(dest >= destEnd)
 163                      goto error1;
 164                  }
 165                } else {
 166                  mState = 2; // EUDC
 167                }
 168             }
 169           } else {
 170             // ASCII
 171             *dest++ = (PRUnichar) *src;
 172             if(dest >= destEnd)
 173               goto error1;
 174           }
 175           break;
 176
 177           case 1: // Index to table
 178           {
 179             PRUint8 off = sbIdx[*src];
 180             if(0xFF == off) {
 181                *dest++ = 0x30FB;
 182             } else {
 183                PRUnichar ch = gJapaneseMap[mData+off];
 184                if(ch == 0xfffd)
 185                  ch = 0x30fb;
 186                *dest++ = ch;
 187             }
 188             mState = 0;
 189             if(dest >= destEnd)
 190               goto error1;
 191           }
 192           break;
 193
 194           case 2: // EUDC
 195           {
 196             PRUint8 off = sbIdx[*src];
 197             if(0xFF == off) {
 198                *dest++ = 0x30fb;
 199             } else {
 200                *dest++ = mData + off;
 201             }
 202             mState = 0;
 203             if(dest >= destEnd)
 204               goto error1;
 205           }
 206           break;
 207
 208        }
 209        src++;
 210    }
 211    *aDestLen = dest - aDest;
 212    return NS_OK;
 213 error1:
 214    *aDestLen = dest-aDest;
 215    src++;
 216    if ((mState == 0) && (src == srcEnd)) {
 217      return NS_OK;
 218    }
 219    *aSrcLen = src - (const unsigned char*)aSrc;
 220    return NS_OK_UDEC_MOREOUTPUT;
 221 }
 222
 223
 224
 225
 226 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
 227    const char * aSrc, PRInt32 * aSrcLen,
 228      PRUnichar * aDest, PRInt32 * aDestLen)
 229 {
 230    static const PRUint8 sbIdx[256] =
 231    {
 232 /* 0x0X */
 233      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 234      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 235 /* 0x1X */
 236      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 237      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 238 /* 0x2X */
 239      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 240      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 241 /* 0x3X */
 242      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 243      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 244 /* 0x4X */
 245      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 246      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 247 /* 0x5X */
 248      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 249      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 250 /* 0x6X */
 251      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 252      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 253 /* 0x7X */
 254      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 255      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 256 /* 0x8X */
 257      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 258      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 259 /* 0x9X */
 260      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 261      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 262 /* 0xAX */
 263      0xFF, 0,    1,    2,    3,    4,    5,    6,
 264      7,    8 ,   9,    10,   11,   12,   13,   14,
 265 /* 0xBX */
 266      15,   16,   17,   18,   19,   20,   21,   22,
 267      23,   24,   25,   26,   27,   28,   29,   30,
 268 /* 0xCX */
 269      31,   32,   33,   34,   35,   36,   37,   38,
 270      39,   40,   41,   42,   43,   44,   45,   46,
 271 /* 0xDX */
 272      47,   48,   49,   50,   51,   52,   53,   54,
 273      55,   56,   57,   58,   59,   60,   61,   62,
 274 /* 0xEX */
 275      63,   64,   65,   66,   67,   68,   69,   70,
 276      71,   72,   73,   74,   75,   76,   77,   78,
 277 /* 0xFX */
 278      79,   80,   81,   82,   83,   84,   85,   86,
 279      87,   88,   89,   90,   91,   92,   93,   0xFF,
 280    };
 281
 282    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
 283    const unsigned char* src =(unsigned char*) aSrc;
 284    PRUnichar* destEnd = aDest + *aDestLen;
 285    PRUnichar* dest = aDest;
 286    while((src < srcEnd))
 287    {
 288        switch(mState)
 289        {
 290           case 0:
 291           if(*src & 0x80  && *src != (unsigned char)0xa0)
 292           {
 293             mData = JIS0208_INDEX[*src & 0x7F];
 294             if(mData != 0xFFFD )
 295             {
 296                mState = 1; // two byte JIS0208
 297             } else {
 298                if( 0x8e == *src) {
 299                  // JIS 0201
 300                  mState = 2; // JIS0201
 301                } else if(0x8f == *src) {
 302                  // JIS 0212
 303                  mState = 3; // JIS0212
 304                } else {
 305                  // others
 306                  *dest++ = 0xFFFD;
 307                  if(dest >= destEnd)
 308                    goto error1;
 309                }
 310             }
 311           } else {
 312             // ASCII
 313             *dest++ = (PRUnichar) *src;
 314             if(dest >= destEnd)
 315               goto error1;
 316           }
 317           break;
 318
 319           case 1: // Index to table
 320           {
 321             PRUint8 off = sbIdx[*src];
 322             if(0xFF == off) {
 323               *dest++ = 0xFFFD;
 324                // if the first byte is valid for EUC-JP but the second
 325                // is not while being a valid US-ASCII(i.e. < 0xc0), save it
 326                // instead of eating it up !
 327                if ( ! (*src & 0xc0)  )
 328                  *dest++ = (PRUnichar) *src;;
 329             } else {
 330                *dest++ = gJapaneseMap[mData+off];
 331             }
 332             mState = 0;
 333             if(dest >= destEnd)
 334               goto error1;
 335           }
 336           break;
 337
 338           case 2: // JIS 0201
 339           {
 340             if((0xA1 <= *src) && (*src <= 0xDF)) {
 341               *dest++ = (0xFF61-0x00A1) + *src;
 342             } else {
 343               *dest++ = 0xFFFD;
 344               // if 0x8e is not followed by a valid JIS X 0201 byte
 345               // but by a valid US-ASCII, save it instead of eating it up.
 346               if ( (PRUint8)*src < (PRUint8)0x7f )
 347                  *dest++ = (PRUnichar) *src;
 348             }
 349             mState = 0;
 350             if(dest >= destEnd)
 351               goto error1;
 352           }
 353           break;
 354
 355           case 3: // JIS 0212
 356           {
 357             if(*src & 0x80)
 358             {
 359               mData = JIS0212_INDEX[*src & 0x7F];
 360               if(mData != 0xFFFD )
 361               {
 362                  mState = 4;
 363               } else {
 364                  mState = 5; // error
 365               }
 366             } else {
 367               mState = 5; // error
 368             }
 369           }
 370           break;
 371           case 4:
 372           {
 373             PRUint8 off = sbIdx[*src];
 374             if(0xFF == off) {
 375                *dest++ = 0xFFFD;
 376             } else {
 377                *dest++ = gJapaneseMap[mData+off];
 378             }
 379             mState = 0;
 380             if(dest >= destEnd)
 381               goto error1;
 382           }
 383           break;
 384           case 5: // two bytes undefined
 385           {
 386             *dest++ = 0xFFFD;
 387             mState = 0;
 388             if(dest >= destEnd)
 389               goto error1;
 390           }
 391           break;
 392        }
 393        src++;
 394    }
 395    *aDestLen = dest - aDest;
 396    return NS_OK;
 397 error1:
 398    *aDestLen = dest-aDest;
 399    src++;
 400    if ((mState == 0) && (src == srcEnd)) {
 401      return NS_OK;
 402    }
 403    *aSrcLen = src - (const unsigned char*)aSrc;
 404    return NS_OK_UDEC_MOREOUTPUT;
 405 }
 406
 407
 408
 409 NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
 410    const char * aSrc, PRInt32 * aSrcLen,
 411      PRUnichar * aDest, PRInt32 * aDestLen)
 412 {
 413    static const PRUint16 fbIdx[128] =
 414    {
 415 /* 0x8X */
 416      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
 417      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
 418 /* 0x9X */
 419      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
 420      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
 421 /* 0xAX */
 422      0xFFFD, 0,      94,     94* 2,  94* 3,  94* 4,  94* 5,  94* 6,
 423      94* 7,  94* 8 , 94* 9,  94*10,  94*11,  94*12,  94*13,  94*14,
 424 /* 0xBX */
 425      94*15,  94*16,  94*17,  94*18,  94*19,  94*20,  94*21,  94*22,
 426      94*23,  94*24,  94*25,  94*26,  94*27,  94*28,  94*29,  94*30,
 427 /* 0xCX */
 428      94*31,  94*32,  94*33,  94*34,  94*35,  94*36,  94*37,  94*38,
 429      94*39,  94*40,  94*41,  94*42,  94*43,  94*44,  94*45,  94*46,
 430 /* 0xDX */
 431      94*47,  94*48,  94*49,  94*50,  94*51,  94*52,  94*53,  94*54,
 432      94*55,  94*56,  94*57,  94*58,  94*59,  94*60,  94*61,  94*62,
 433 /* 0xEX */
 434      94*63,  94*64,  94*65,  94*66,  94*67,  94*68,  94*69,  94*70,
 435      94*71,  94*72,  94*73,  94*74,  94*75,  94*76,  94*77,  94*78,
 436 /* 0xFX */
 437      94*79,  94*80,  94*81,  94*82,  94*83,  94*84,  94*85,  94*86,
 438      94*87,  94*88,  94*89,  94*90,  94*91,  94*92,  94*93,  0xFFFD,
 439    };
 440    static const PRUint8 sbIdx[256] =
 441    {
 442 /* 0x0X */
 443      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 444      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 445 /* 0x1X */
 446      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 447      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 448 /* 0x2X */
 449      0xFF, 0,    1,    2,    3,    4,    5,    6,
 450      7,    8 ,   9,    10,   11,   12,   13,   14,
 451 /* 0x3X */
 452      15,   16,   17,   18,   19,   20,   21,   22,
 453      23,   24,   25,   26,   27,   28,   29,   30,
 454 /* 0x4X */
 455      31,   32,   33,   34,   35,   36,   37,   38,
 456      39,   40,   41,   42,   43,   44,   45,   46,
 457 /* 0x5X */
 458      47,   48,   49,   50,   51,   52,   53,   54,
 459      55,   56,   57,   58,   59,   60,   61,   62,
 460 /* 0x6X */
 461      63,   64,   65,   66,   67,   68,   69,   70,
 462      71,   72,   73,   74,   75,   76,   77,   78,
 463 /* 0x7X */
 464      79,   80,   81,   82,   83,   84,   85,   86,
 465      87,   88,   89,   90,   91,   92,   93,   0xFF,
 466 /* 0x8X */
 467      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 468      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 469 /* 0x9X */
 470      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 471      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 472 /* 0xAX */
 473      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 474      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 475 /* 0xBX */
 476      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 477      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 478 /* 0xCX */
 479      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 480      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 481 /* 0xDX */
 482      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 483      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 484 /* 0xEX */
 485      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 486      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 487 /* 0xFX */
 488      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 489      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 490    };
 491
 492    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
 493    const unsigned char* src =(unsigned char*) aSrc;
 494    PRUnichar* destEnd = aDest + *aDestLen;
 495    PRUnichar* dest = aDest;
 496    while((src < srcEnd))
 497    {
 498
 499        switch(mState)
 500        {
 501           case mState_ASCII:
 502             if(0x1b == *src)
 503             {
 504               mLastLegalState = mState;
 505               mState = mState_ESC;
 506             } else if(*src & 0x80) {
 507               goto error2;
 508             } else {
 509               *dest++ = (PRUnichar) *src;
 510               if(dest >= destEnd)
 511                 goto error1;
 512             }
 513           break;
 514
 515           case mState_ESC:
 516             if( '(' == *src) {
 517               mState = mState_ESC_28;
 518             } else if ('$' == *src)  {
 519               mState = mState_ESC_24;
 520             } else if ('.' == *src)  { // for ISO-2022-JP-2
 521               mState = mState_ESC_2e;
 522             } else if ('N' == *src)  { // for ISO-2022-JP-2
 523               mState = mState_ESC_4e;
 524             } else  {
 525               if((dest+2) >= destEnd)
 526                 goto error1;
 527               *dest++ = (PRUnichar) 0x1b;
 528               if(0x80 & *src)
 529                 goto error2;
 530               *dest++ = (PRUnichar) *src;
 531               mState = mLastLegalState;
 532             }
 533           break;
 534
 535           case mState_ESC_28: // ESC (
 536             if( 'B' == *src) {
 537               mState = mState_ASCII;
 538               if (mRunLength == 0) {
 539                 if((dest+1) >= destEnd)
 540                   goto error1;
 541                 *dest++ = 0xFFFD;
 542               }
 543               mRunLength = 0;
 544             } else if ('J' == *src)  {
 545               mState = mState_JISX0201_1976Roman;
 546               if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
 547                 if((dest+1) >= destEnd)
 548                   goto error1;
 549                 *dest++ = 0xFFFD;
 550               }
 551               mRunLength = 0;
 552             } else if ('I' == *src)  {
 553               mState = mState_JISX0201_1976Kana;
 554               mRunLength = 0;
 555             } else  {
 556               if((dest+3) >= destEnd)
 557                 goto error1;
 558               *dest++ = (PRUnichar) 0x1b;
 559               *dest++ = (PRUnichar) '(';
 560               if(0x80 & *src)
 561                 goto error2;
 562               *dest++ = (PRUnichar) *src;
 563               mState = mLastLegalState;
 564             }
 565           break;
 566
 567           case mState_ESC_24: // ESC $
 568             if( '@' == *src) {
 569               mState = mState_JISX0208_1978;
 570               mRunLength = 0;
 571             } else if ('A' == *src)  {
 572               mState = mState_GB2312_1980;
 573               mRunLength = 0;
 574             } else if ('B' == *src)  {
 575               mState = mState_JISX0208_1983;
 576               mRunLength = 0;
 577             } else if ('(' == *src)  {
 578               mState = mState_ESC_24_28;
 579             } else  {
 580               if((dest+3) >= destEnd)
 581                 goto error1;
 582               *dest++ = (PRUnichar) 0x1b;
 583               *dest++ = (PRUnichar) '$';
 584               if(0x80 & *src)
 585                 goto error2;
 586               *dest++ = (PRUnichar) *src;
 587               mState = mLastLegalState;
 588             }
 589           break;
 590
 591           case mState_ESC_24_28: // ESC $ (
 592             if( 'C' == *src) {
 593               mState = mState_KSC5601_1987;
 594               mRunLength = 0;
 595             } else if ('D' == *src) {
 596               mState = mState_JISX0212_1990;
 597               mRunLength = 0;
 598             } else  {
 599               if((dest+4) >= destEnd)
 600                 goto error1;
 601               *dest++ = (PRUnichar) 0x1b;
 602               *dest++ = (PRUnichar) '$';
 603               *dest++ = (PRUnichar) '(';
 604               if(0x80 & *src)
 605                 goto error2;
 606               *dest++ = (PRUnichar) *src;
 607               mState = mLastLegalState;
 608             }
 609           break;
 610
 611           case mState_JISX0201_1976Roman:
 612             if(0x1b == *src) {
 613               mLastLegalState = mState;
 614               mState = mState_ESC;
 615             } else if(*src & 0x80) {
 616               goto error2;
 617             } else {
 618               // XXX We need to  decide how to handle \ and ~ here
 619               // we may need a if statement here for '\' and '~'
 620               // to map them to Yen and Overbar
 621               *dest++ = (PRUnichar) *src;
 622               ++mRunLength;
 623               if(dest >= destEnd)
 624                 goto error1;
 625             }
 626           break;
 627
 628           case mState_JISX0201_1976Kana:
 629             if(0x1b == *src) {
 630               mLastLegalState = mState;
 631               mState = mState_ESC;
 632             } else {
 633               if((0x21 <= *src) && (*src <= 0x5F)) {
 634                 *dest++ = (0xFF61-0x0021) + *src;
 635                 ++mRunLength;
 636               } else {
 637                 goto error2;
 638               }
 639               if(dest >= destEnd)
 640                 goto error1;
 641             }
 642           break;
 643
 644           case mState_JISX0208_1978:
 645             if(0x1b == *src) {
 646               mLastLegalState = mState;
 647               mState = mState_ESC;
 648             } else if(*src & 0x80) {
 649               mLastLegalState = mState;
 650               mState = mState_ERROR;
 651             } else {
 652               mData = JIS0208_INDEX[*src & 0x7F];
 653               if(0xFFFD == mData)
 654                 goto error2;
 655               mState = mState_JISX0208_1978_2ndbyte;
 656             }
 657           break;
 658
 659           case mState_GB2312_1980:
 660             if(0x1b == *src) {
 661               mLastLegalState = mState;
 662               mState = mState_ESC;
 663             } else if(*src & 0x80) {
 664               mLastLegalState = mState;
 665               mState = mState_ERROR;
 666             } else {
 667               mData = fbIdx[*src & 0x7F];
 668               if(0xFFFD == mData)
 669                 goto error2;
 670               mState = mState_GB2312_1980_2ndbyte;
 671             }
 672           break;
 673
 674           case mState_JISX0208_1983:
 675             if(0x1b == *src) {
 676               mLastLegalState = mState;
 677               mState = mState_ESC;
 678             } else if(*src & 0x80) {
 679               mLastLegalState = mState;
 680               mState = mState_ERROR;
 681             } else {
 682               mData = JIS0208_INDEX[*src & 0x7F];
 683               if(0xFFFD == mData)
 684                 goto error2;
 685               mState = mState_JISX0208_1983_2ndbyte;
 686             }
 687           break;
 688
 689           case mState_KSC5601_1987:
 690             if(0x1b == *src) {
 691               mLastLegalState = mState;
 692               mState = mState_ESC;
 693             } else if(*src & 0x80) {
 694               mLastLegalState = mState;
 695               mState = mState_ERROR;
 696             } else {
 697               mData = fbIdx[*src & 0x7F];
 698               if(0xFFFD == mData)
 699                 goto error2;
 700               mState = mState_KSC5601_1987_2ndbyte;
 701             }
 702           break;
 703
 704           case mState_JISX0212_1990:
 705             if(0x1b == *src) {
 706               mLastLegalState = mState;
 707               mState = mState_ESC;
 708             } else if(*src & 0x80) {
 709               mLastLegalState = mState;
 710               mState = mState_ERROR;
 711             } else {
 712               mData = JIS0212_INDEX[*src & 0x7F];
 713               if(0xFFFD == mData)
 714                 goto error2;
 715               mState = mState_JISX0212_1990_2ndbyte;
 716             }
 717           break;
 718
 719           case mState_JISX0208_1978_2ndbyte:
 720           {
 721             PRUint8 off = sbIdx[*src];
 722             if(0xFF == off) {
 723                goto error2;
 724             } else {
 725                // XXX We need to map from JIS X 0208 1983 to 1987
 726                // in the next line before pass to *dest++
 727                *dest++ = gJapaneseMap[mData+off];
 728                ++mRunLength;
 729             }
 730             mState = mState_JISX0208_1978;
 731             if(dest >= destEnd)
 732               goto error1;
 733           }
 734           break;
 735
 736           case mState_GB2312_1980_2ndbyte:
 737           {
 738             PRUint8 off = sbIdx[*src];
 739             if(0xFF == off) {
 740                goto error2;
 741             } else {
 742               if (!mGB2312Decoder) {
 743                 // creating a delegate converter (GB2312)
 744                 nsresult rv;
 745                 nsCOMPtr<nsICharsetConverterManager> ccm =
 746                          do_GetService(kCharsetConverterManagerCID, &rv);
 747                 if (NS_SUCCEEDED(rv)) {
 748                   rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder);
 749                 }
 750               }
 751               if (!mGB2312Decoder) {// failed creating a delegate converter
 752                 goto error2;
 753               } else {
 754                 unsigned char gb[2];
 755                 PRUnichar uni;
 756                 PRInt32 gbLen = 2, uniLen = 1;
 757                 // ((mData/94)+0x21) is the original 1st byte.
 758                 // *src is the present 2nd byte.
 759                 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
 760                 gb[0] = ((mData / 94) + 0x21) | 0x80;
 761                 gb[1] = *src | 0x80;
 762                 // Convert GB2312 to unicode.
 763                 mGB2312Decoder->Convert((const char *)gb, &gbLen,
 764                                         &uni, &uniLen);
 765                 *dest++ = uni;
 766                 ++mRunLength;
 767               }
 768             }
 769             mState = mState_GB2312_1980;
 770             if(dest >= destEnd)
 771               goto error1;
 772           }
 773           break;
 774
 775           case mState_JISX0208_1983_2ndbyte:
 776           {
 777             PRUint8 off = sbIdx[*src];
 778             if(0xFF == off) {
 779                goto error2;
 780             } else {
 781                *dest++ = gJapaneseMap[mData+off];
 782                ++mRunLength;
 783             }
 784             mState = mState_JISX0208_1983;
 785             if(dest >= destEnd)
 786               goto error1;
 787           }
 788           break;
 789
 790           case mState_KSC5601_1987_2ndbyte:
 791           {
 792             PRUint8 off = sbIdx[*src];
 793             if(0xFF == off) {
 794                goto error2;
 795             } else {
 796               if (!mEUCKRDecoder) {
 797                 // creating a delegate converter (EUC-KR)
 798                 nsresult rv;
 799                 nsCOMPtr<nsICharsetConverterManager> ccm =
 800                          do_GetService(kCharsetConverterManagerCID, &rv);
 801                 if (NS_SUCCEEDED(rv)) {
 802                   rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
 803                 }
 804               }
 805               if (!mEUCKRDecoder) {// failed creating a delegate converter
 806                 goto error2;
 807               } else {
 808                 unsigned char ksc[2];
 809                 PRUnichar uni;
 810                 PRInt32 kscLen = 2, uniLen = 1;
 811                 // ((mData/94)+0x21) is the original 1st byte.
 812                 // *src is the present 2nd byte.
 813                 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
 814                 ksc[0] = ((mData / 94) + 0x21) | 0x80;
 815                 ksc[1] = *src | 0x80;
 816                 // Convert EUC-KR to unicode.
 817                 mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
 818                                        &uni, &uniLen);
 819                 *dest++ = uni;
 820                 ++mRunLength;
 821               }
 822             }
 823             mState = mState_KSC5601_1987;
 824             if(dest >= destEnd)
 825               goto error1;
 826           }
 827           break;
 828
 829           case mState_JISX0212_1990_2ndbyte:
 830           {
 831             PRUint8 off = sbIdx[*src];
 832             if(0xFF == off) {
 833                goto error2;
 834             } else {
 835                *dest++ = gJapaneseMap[mData+off];
 836                ++mRunLength;
 837             }
 838             mState = mState_JISX0212_1990;
 839             if(dest >= destEnd)
 840               goto error1;
 841           }
 842           break;
 843
 844           case mState_ESC_2e: // ESC .
 845             // "ESC ." will designate 96 character set to G2.
 846             mState = mLastLegalState;
 847             if( 'A' == *src) {
 848               G2charset = G2_ISO88591;
 849             } else if ('F' == *src) {
 850               G2charset = G2_ISO88597;
 851             } else  {
 852               if((dest+3) >= destEnd)
 853                 goto error1;
 854               *dest++ = (PRUnichar) 0x1b;
 855               *dest++ = (PRUnichar) '.';
 856               if(0x80 & *src)
 857                 goto error2;
 858               *dest++ = (PRUnichar) *src;
 859             }
 860           break;
 861
 862           case mState_ESC_4e: // ESC N
 863             // "ESC N" is the SS2 sequence, that invoke a G2 designated
 864             // character set.  Since SS2 is effective only for next one
 865             // character, mState should be returned to the last status.
 866             mState = mLastLegalState;
 867             if((0x20 <= *src) && (*src <= 0x7F)) {
 868               if (G2_ISO88591 == G2charset) {
 869                 *dest++ = *src | 0x80;
 870                 ++mRunLength;
 871               } else if (G2_ISO88597 == G2charset) {
 872                 if (!mISO88597Decoder) {
 873                   // creating a delegate converter (ISO-8859-7)
 874                   nsresult rv;
 875                   nsCOMPtr<nsICharsetConverterManager> ccm =
 876                            do_GetService(kCharsetConverterManagerCID, &rv);
 877                   if (NS_SUCCEEDED(rv)) {
 878                     rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder);
 879                   }
 880                 }
 881                 if (!mISO88597Decoder) {// failed creating a delegate converter
 882                   goto error2;
 883                 } else {
 884                   // Put one character with ISO-8859-7 encoding.
 885                   unsigned char gr = *src | 0x80;
 886                   PRUnichar uni;
 887                   PRInt32 grLen = 1, uniLen = 1;
 888                   // Convert ISO-8859-7 to unicode.
 889                   mISO88597Decoder->Convert((const char *)&gr, &grLen,
 890                                             &uni, &uniLen);
 891                   *dest++ = uni;
 892                   ++mRunLength;
 893                 }
 894               } else {// G2charset is G2_unknown (not designated yet)
 895                 goto error2;
 896               }
 897               if(dest >= destEnd)
 898                 goto error1;
 899             } else {
 900               if((dest+3) >= destEnd)
 901                 goto error1;
 902               *dest++ = (PRUnichar) 0x1b;
 903               *dest++ = (PRUnichar) 'N';
 904               if(0x80 & *src)
 905                 goto error2;
 906               *dest++ = (PRUnichar) *src;
 907             }
 908           break;
 909
 910           case mState_ERROR:
 911              mState = mLastLegalState;
 912              mRunLength = 0;
 913              goto error2;
 914           break;
 915
 916        } // switch
 917        src++;
 918    }
 919    *aDestLen = dest - aDest;
 920    return NS_OK;
 921 error1:
 922    *aDestLen = dest-aDest;
 923    src++;
 924    if ((mState == 0) && (src == srcEnd)) {
 925      return NS_OK;
 926    }
 927    *aSrcLen = src - (const unsigned char*)aSrc;
 928    return NS_OK_UDEC_MOREOUTPUT;
 929 error2:
 930    *aSrcLen = src - (const unsigned char*)aSrc;
 931    *aDestLen = dest-aDest;
 932    return NS_ERROR_UNEXPECTED;
 933 }