1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Mozilla Communicator client code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #include "nsGBKConvUtil.h"
41 #include "nsICharRepresentable.h"
42 #define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
43 //--------------------------------------------------------------------
45 //--------------------------------------------------------------------
47 static PRBool gInitToGBKTable
= PR_FALSE
;
48 static const PRUnichar gGBKToUnicodeTable
[MAX_GBK_LENGTH
] = {
51 static PRUint16 gUnicodeToGBKTable
[0xA000-0x4e00];
53 PRBool
nsGBKConvUtil::UnicodeToGBKChar(
54 PRUnichar aChar
, PRBool aToGL
, char*
55 aOutByte1
, char* aOutByte2
)
57 NS_ASSERTION(gInitToGBKTable
, "gGBKToUnicodeTable is not init yet. need to call InitToGBKTable first");
58 PRBool found
=PR_FALSE
;
59 *aOutByte1
= *aOutByte2
= 0;
60 if(UNICHAR_IN_RANGE(0xd800, aChar
, 0xdfff))
62 // surrogate is not in here
65 if(UNICHAR_IN_RANGE(0x4e00, aChar
, 0x9FFF))
67 PRUint16 item
= gUnicodeToGBKTable
[aChar
- 0x4e00];
70 *aOutByte1
= item
>> 8;
71 *aOutByte2
= item
& 0x00FF;
78 for( PRInt32 i
= 0; i
< MAX_GBK_LENGTH
; i
++ )
80 if( aChar
== gGBKToUnicodeTable
[i
])
82 *aOutByte1
= (i
/ 0x00BF + 0x0081) ;
83 *aOutByte2
= (i
% 0x00BF + 0x0040) ;
93 // to GL, we only return if it is in the range
94 if(UINT8_IN_RANGE(0xA1, *aOutByte1
, 0xFE) &&
95 UINT8_IN_RANGE(0xA1, *aOutByte2
, 0xFE))
101 // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean
102 // it is not a GB2312 character, we cannot map to GL
110 PRUnichar
nsGBKConvUtil::GBKCharToUnicode(char aByte1
, char aByte2
)
112 NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1
, 0xFE), "first byte out of range");
113 NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2
, 0xFE), "second byte out of range");
115 PRUint8 i1
= (PRUint8
)aByte1
;
116 PRUint8 i2
= (PRUint8
)aByte2
;
117 PRUint16 idx
= (i1
- 0x0081) * 0x00bf + i2
- 0x0040 ;
119 NS_ASSERTION(idx
< MAX_GBK_LENGTH
, "ARB");
120 // play it safe- add if statement here ot protect ARB
121 // probably not necessary
122 if(idx
< MAX_GBK_LENGTH
)
123 return gGBKToUnicodeTable
[ idx
];
125 return UCS2_NO_MAPPING
;
127 void nsGBKConvUtil::InitToGBKTable()
129 if ( gInitToGBKTable
)
134 // zap it to zero first
135 memset(gUnicodeToGBKTable
,0, sizeof(gUnicodeToGBKTable
));
137 for ( i
=0; i
<MAX_GBK_LENGTH
; i
++ )
139 unicode
= gGBKToUnicodeTable
[i
];
140 // to reduce size of gUnicodeToGBKTable, we only do direct unicode to GB
141 // table mapping between unicode 0x4E00 and 0xA000. Others by searching
142 // gGBKToUnicodeTable. There is a trade off between memory usage and speed.
143 if(UNICHAR_IN_RANGE(0x4e00, unicode
, 0x9fff))
146 gUnicodeToGBKTable
[unicode
] = (( i
/ 0x00BF + 0x0081) << 8) |
147 ( i
% 0x00BF+ 0x0040);
150 gInitToGBKTable
= PR_TRUE
;
152 void nsGBKConvUtil::FillInfo(
154 PRUint8 aStart1
, PRUint8 aEnd1
,
155 PRUint8 aStart2
, PRUint8 aEnd2
161 for ( i
=aStart1
; i
<=aEnd1
; i
++)
163 for( j
=aStart2
; j
<=aEnd2
; j
++)
165 k
= (i
- 0x0081)*0x00BF +(j
-0x0040);
166 unicode
= gGBKToUnicodeTable
[k
];
167 NS_ASSERTION(unicode
!= 0xFFFF, "somehow the table still use 0xffff");
168 if (unicode
!= UCS2_NO_MAPPING
)
170 SET_REPRESENTABLE(aInfo
, unicode
);
175 void nsGBKConvUtil::FillGB2312Info(
179 // The following range is coded by looking at the GB2312 standard
180 // and make sure we do not call FillInfo for undefined code point
182 // row 1 - 1 range (full)
183 FillInfo(aInfo
, 0x21|0x80, 0x21|0x80, 0x21|0x80, 0x7E|0x80);
185 FillInfo(aInfo
, 0x22|0x80, 0x22|0x80, (0x20+17)|0x80, (0x20+66)|0x80);
186 FillInfo(aInfo
, 0x22|0x80, 0x22|0x80, (0x20+69)|0x80, (0x20+78)|0x80);
187 FillInfo(aInfo
, 0x22|0x80, 0x22|0x80, (0x20+81)|0x80, (0x20+92)|0x80);
188 // row 3 - 1 range (full)
189 FillInfo(aInfo
, 0x23|0x80, 0x23|0x80, 0x21|0x80, 0x7E|0x80);
191 FillInfo(aInfo
, 0x24|0x80, 0x24|0x80, (0x20+ 1)|0x80, (0x20+83)|0x80);
193 FillInfo(aInfo
, 0x25|0x80, 0x25|0x80, (0x20+ 1)|0x80, (0x20+86)|0x80);
195 FillInfo(aInfo
, 0x26|0x80, 0x26|0x80, (0x20+ 1)|0x80, (0x20+24)|0x80);
196 FillInfo(aInfo
, 0x26|0x80, 0x26|0x80, (0x20+33)|0x80, (0x20+56)|0x80);
198 FillInfo(aInfo
, 0x27|0x80, 0x27|0x80, (0x20+ 1)|0x80, (0x20+33)|0x80);
199 FillInfo(aInfo
, 0x27|0x80, 0x27|0x80, (0x20+49)|0x80, (0x20+81)|0x80);
201 FillInfo(aInfo
, 0x28|0x80, 0x28|0x80, (0x20+ 1)|0x80, (0x20+26)|0x80);
202 FillInfo(aInfo
, 0x28|0x80, 0x28|0x80, (0x20+36)|0x80, (0x20+73)|0x80);
204 FillInfo(aInfo
, 0x29|0x80, 0x29|0x80, (0x20+ 4)|0x80, (0x20+79)|0x80);
206 // Frequent used Hanzi
208 FillInfo(aInfo
, 0x30|0x80, 0x56|0x80, 0x21|0x80, 0x7E|0x80);
210 FillInfo(aInfo
, 0x57|0x80, 0x57|0x80, 0x21|0x80, 0x79|0x80);
212 // Infrequent used Hanzi
214 FillInfo(aInfo
, 0x58|0x80, 0x77|0x80, 0x21|0x80, 0x7E|0x80);