1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 * A character set converter from Unicode to GBK.
41 * @created 08/Sept/1999
42 * @author Yueheng Xu, Yueheng.Xu@intel.com
44 * 04/Oct/1999. Yueheng Xu: used table gUnicodeToGBKTable[0x5200] to make
45 * Unicode to GB mapping fast
48 #include "nsUnicodeToGBK.h"
49 #include "nsICharRepresentable.h"
50 #include "nsUCvCnDll.h"
52 #include "uconvutil.h"
53 #include "nsUnicharUtils.h"
55 //-------------------------------------------------------------
56 // Global table initialization function defined in gbku.h
57 //-------------------------------------------------------------
59 //-----------------------------------------------------------------------
60 // Private class used by nsUnicodeToGB18030 and nsUnicodeToGB18030Font0
61 // nsUnicodeToGB18030Uniq2Bytes
62 //-----------------------------------------------------------------------
63 static const PRUint16 g_uf_gb18030_2bytes
[] = {
64 #include "gb18030uniq2b.uf"
66 class nsUnicodeToGB18030Uniq2Bytes
: public nsTableEncoderSupport
69 nsUnicodeToGB18030Uniq2Bytes()
70 : nsTableEncoderSupport(u2BytesCharset
,
71 (uMappingTable
*) &g_uf_gb18030_2bytes
, 2) {}
74 //-----------------------------------------------------------------------
75 // Private class used by nsUnicodeToGB18030
76 // nsUnicodeTo4BytesGB18030
77 //-----------------------------------------------------------------------
78 static const PRUint16 g_uf_gb18030_4bytes
[] = {
79 #include "gb180304bytes.uf"
81 class nsUnicodeTo4BytesGB18030
: public nsTableEncoderSupport
84 nsUnicodeTo4BytesGB18030()
85 : nsTableEncoderSupport(u4BytesGB18030Charset
,
86 (uMappingTable
*) &g_uf_gb18030_4bytes
, 4) {}
89 //-----------------------------------------------------------------------
90 // Private class used by nsUnicodeToGBK
91 // nsUnicodeToGBKUniq2Bytes
92 //-----------------------------------------------------------------------
93 static const PRUint16 g_uf_gbk_2bytes
[] = {
94 #include "gbkuniq2b.uf"
96 class nsUnicodeToGBKUniq2Bytes
: public nsTableEncoderSupport
99 nsUnicodeToGBKUniq2Bytes()
100 : nsTableEncoderSupport(u2BytesCharset
,
101 (uMappingTable
*) &g_uf_gbk_2bytes
, 2) {}
104 //-----------------------------------------------------------------------
105 // nsUnicodeToGB18030
106 //-----------------------------------------------------------------------
107 void nsUnicodeToGB18030::CreateExtensionEncoder()
109 mExtensionEncoder
= new nsUnicodeToGB18030Uniq2Bytes();
111 void nsUnicodeToGB18030::Create4BytesEncoder()
113 m4BytesEncoder
= new nsUnicodeTo4BytesGB18030();
116 PRBool
nsUnicodeToGB18030::EncodeSurrogate(
117 PRUnichar aSurrogateHigh
,
118 PRUnichar aSurrogateLow
,
121 if( NS_IS_HIGH_SURROGATE(aSurrogateHigh
) &&
122 NS_IS_LOW_SURROGATE(aSurrogateLow
) )
124 // notice that idx does not include the 0x10000
125 PRUint32 idx
= ((aSurrogateHigh
- (PRUnichar
)0xD800) << 10 ) |
126 (aSurrogateLow
- (PRUnichar
) 0xDC00);
128 unsigned char *out
= (unsigned char*) aOut
;
129 // notice this is from 0x90 for supplment planes
130 out
[0] = (idx
/ (10*126*10)) + 0x90;
132 out
[1] = (idx
/ (10*126)) + 0x30;
134 out
[2] = (idx
/ (10)) + 0x81;
135 out
[3] = (idx
% 10) + 0x30;
141 //----------------------------------------------------------------------
142 // Class nsUnicodeToGBK [implementation]
144 nsUnicodeToGBK::nsUnicodeToGBK(PRUint32 aMaxLength
) :
145 nsEncoderSupport(aMaxLength
)
147 mExtensionEncoder
= nsnull
;
148 m4BytesEncoder
= nsnull
;
149 mUtil
.InitToGBKTable();
152 void nsUnicodeToGBK::CreateExtensionEncoder()
154 mExtensionEncoder
= new nsUnicodeToGBKUniq2Bytes();
156 void nsUnicodeToGBK::Create4BytesEncoder()
158 m4BytesEncoder
= nsnull
;
160 PRBool
nsUnicodeToGBK::TryExtensionEncoder(
166 if( NS_IS_HIGH_SURROGATE(aChar
) ||
167 NS_IS_LOW_SURROGATE(aChar
) )
169 // performance tune for surrogate characters
172 if(! mExtensionEncoder
)
173 CreateExtensionEncoder();
174 if(mExtensionEncoder
)
177 nsresult res
= NS_OK
;
178 res
= mExtensionEncoder
->Convert(&aChar
, &len
, aOut
, aOutLen
);
179 if(NS_SUCCEEDED(res
) && (*aOutLen
> 0))
185 PRBool
nsUnicodeToGBK::Try4BytesEncoder(
191 if( NS_IS_HIGH_SURROGATE(aChar
) ||
192 NS_IS_LOW_SURROGATE(aChar
) )
194 // performance tune for surrogate characters
197 if(! m4BytesEncoder
)
198 Create4BytesEncoder();
202 nsresult res
= NS_OK
;
203 res
= m4BytesEncoder
->Convert(&aChar
, &len
, aOut
, aOutLen
);
204 NS_ASSERTION(NS_FAILED(res
) || ((1 == len
) && (4 == *aOutLen
)),
205 "unexpect conversion length");
206 if(NS_SUCCEEDED(res
) && (*aOutLen
> 0))
211 PRBool
nsUnicodeToGBK::EncodeSurrogate(
212 PRUnichar aSurrogateHigh
,
213 PRUnichar aSurrogateLow
,
216 return PR_FALSE
; // GBK cannot encode Surrogate, let the subclass encode it.
219 NS_IMETHODIMP
nsUnicodeToGBK::ConvertNoBuff(
220 const PRUnichar
* aSrc
,
221 PRInt32
* aSrcLength
,
223 PRInt32
* aDestLength
)
225 PRInt32 iSrcLength
= 0;
226 PRInt32 iDestLength
= 0;
228 nsresult res
= NS_OK
;
229 while (iSrcLength
< *aSrcLength
)
232 //if unicode's hi byte has something, it is not ASCII, must be a GB
233 if(IS_ASCII(unicode
))
236 *aDest
= CAST_UNICHAR_TO_CHAR(*aSrc
);
237 aDest
++; // increment 1 byte
241 if(mUtil
.UnicodeToGBKChar( unicode
, PR_FALSE
, &byte1
, &byte2
))
243 // make sure we still have 2 bytes for output first
244 if(iDestLength
+2 > *aDestLength
)
246 res
= NS_OK_UENC_MOREOUTPUT
;
251 aDest
+= 2; // increment 2 bytes
255 // make sure we still have 2 bytes for output first
256 if(iDestLength
+2 > *aDestLength
)
258 res
= NS_OK_UENC_MOREOUTPUT
;
261 // we cannot map in the common mapping. Let's try to
262 // call the delegated 2 byte converter for the gbk or gb18030
263 // unique 2 byte mapping
264 if(TryExtensionEncoder(unicode
, aDest
, &aOutLen
))
266 iDestLength
+= aOutLen
;
269 // make sure we still have 4 bytes for output first
270 if(iDestLength
+4 > *aDestLength
)
272 res
= NS_OK_UENC_MOREOUTPUT
;
275 // we still cannot map. Let's try to
276 // call the delegated GB18030 4 byte converter
278 if( NS_IS_HIGH_SURROGATE(unicode
) )
280 if((iSrcLength
+1) < *aSrcLength
) {
281 if(EncodeSurrogate(aSrc
[0],aSrc
[1], aDest
)) {
282 // since we got a surrogate pair, we need to increment src.
285 iDestLength
+= aOutLen
;
288 // only get a high surrogate, but not a low surrogate
289 res
= NS_ERROR_UENC_NOMAPPING
;
290 iSrcLength
++; // include length of the unmapped character
294 mSurrogateHigh
= aSrc
[0];
295 break; // this will go to afterwhileloop
298 if( NS_IS_LOW_SURROGATE(unicode
) )
300 if(NS_IS_HIGH_SURROGATE(mSurrogateHigh
)) {
301 if(EncodeSurrogate(mSurrogateHigh
, aSrc
[0], aDest
)) {
302 iDestLength
+= aOutLen
;
305 // only get a high surrogate, but not a low surrogate
306 res
= NS_ERROR_UENC_NOMAPPING
;
307 iSrcLength
++; // include length of the unmapped character
311 // only get a low surrogate, but not a low surrogate
312 res
= NS_ERROR_UENC_NOMAPPING
;
313 iSrcLength
++; // include length of the unmapped character
317 if(Try4BytesEncoder(unicode
, aDest
, &aOutLen
))
319 NS_ASSERTION((aOutLen
== 4), "we should always generate 4 bytes here");
320 iDestLength
+= aOutLen
;
323 res
= NS_ERROR_UENC_NOMAPPING
;
324 iSrcLength
++; // include length of the unmapped character
332 iSrcLength
++ ; // Each unicode char just count as one in PRUnichar string;
335 if ( iDestLength
>= (*aDestLength
) && (iSrcLength
< *aSrcLength
) )
337 res
= NS_OK_UENC_MOREOUTPUT
;
342 *aDestLength
= iDestLength
;
343 *aSrcLength
= iSrcLength
;
347 //----------------------------------------------------------------------
348 // Subclassing of nsTableEncoderSupport class [implementation]
350 NS_IMETHODIMP
nsUnicodeToGBK::FillInfo(PRUint32
*aInfo
)
352 mUtil
.FillInfo(aInfo
, 0x81, 0xFE, 0x40, 0xFE);
353 if(! mExtensionEncoder
)
354 CreateExtensionEncoder();
355 if(mExtensionEncoder
)
357 nsCOMPtr
<nsICharRepresentable
> aRep
= do_QueryInterface(mExtensionEncoder
);
358 aRep
->FillInfo(aInfo
);
361 if(! m4BytesEncoder
)
362 Create4BytesEncoder();
365 nsCOMPtr
<nsICharRepresentable
> aRep
= do_QueryInterface(m4BytesEncoder
);
366 aRep
->FillInfo(aInfo
);
369 //GBK font lib also have single byte ASCII characters, set them here
370 for (PRUint16 SrcUnicode
= 0x0000; SrcUnicode
<= 0x007F; SrcUnicode
++)
371 SET_REPRESENTABLE(aInfo
, SrcUnicode
);
372 SET_REPRESENTABLE(aInfo
, 0x20ac); // euro