Bug 448909 - Need more controls WHATWG Video tag (followup patch). r=mconnor
[wine-gecko.git] / intl / uconv / ucvcn / nsUnicodeToGBK.cpp
blob58196f6b994f284be5f589309e2f5dcedd699c85
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
37 /**
38 * A character set converter from Unicode to GBK.
41 * @created 08/Sept/1999
42 * @author Yueheng Xu, Yueheng.Xu@intel.com
43 * Revision History
44 * 04/Oct/1999. Yueheng Xu: used table gUnicodeToGBKTable[0x5200] to make
45 * Unicode to GB mapping fast
48 #include "nsUnicodeToGBK.h"
49 #include "nsICharRepresentable.h"
50 #include "nsUCvCnDll.h"
51 #include "gbku.h"
52 #include "uconvutil.h"
53 #include "nsUnicharUtils.h"
55 //-------------------------------------------------------------
56 // Global table initialization function defined in gbku.h
57 //-------------------------------------------------------------
59 //-----------------------------------------------------------------------
60 // Private class used by nsUnicodeToGB18030 and nsUnicodeToGB18030Font0
61 // nsUnicodeToGB18030Uniq2Bytes
62 //-----------------------------------------------------------------------
63 static const PRUint16 g_uf_gb18030_2bytes[] = {
64 #include "gb18030uniq2b.uf"
66 class nsUnicodeToGB18030Uniq2Bytes : public nsTableEncoderSupport
68 public:
69 nsUnicodeToGB18030Uniq2Bytes()
70 : nsTableEncoderSupport(u2BytesCharset,
71 (uMappingTable*) &g_uf_gb18030_2bytes, 2) {}
72 protected:
74 //-----------------------------------------------------------------------
75 // Private class used by nsUnicodeToGB18030
76 // nsUnicodeTo4BytesGB18030
77 //-----------------------------------------------------------------------
78 static const PRUint16 g_uf_gb18030_4bytes[] = {
79 #include "gb180304bytes.uf"
81 class nsUnicodeTo4BytesGB18030 : public nsTableEncoderSupport
83 public:
84 nsUnicodeTo4BytesGB18030()
85 : nsTableEncoderSupport(u4BytesGB18030Charset,
86 (uMappingTable*) &g_uf_gb18030_4bytes, 4) {}
87 protected:
89 //-----------------------------------------------------------------------
90 // Private class used by nsUnicodeToGBK
91 // nsUnicodeToGBKUniq2Bytes
92 //-----------------------------------------------------------------------
93 static const PRUint16 g_uf_gbk_2bytes[] = {
94 #include "gbkuniq2b.uf"
96 class nsUnicodeToGBKUniq2Bytes : public nsTableEncoderSupport
98 public:
99 nsUnicodeToGBKUniq2Bytes()
100 : nsTableEncoderSupport(u2BytesCharset,
101 (uMappingTable*) &g_uf_gbk_2bytes, 2) {}
102 protected:
104 //-----------------------------------------------------------------------
105 // nsUnicodeToGB18030
106 //-----------------------------------------------------------------------
107 void nsUnicodeToGB18030::CreateExtensionEncoder()
109 mExtensionEncoder = new nsUnicodeToGB18030Uniq2Bytes();
111 void nsUnicodeToGB18030::Create4BytesEncoder()
113 m4BytesEncoder = new nsUnicodeTo4BytesGB18030();
116 PRBool nsUnicodeToGB18030::EncodeSurrogate(
117 PRUnichar aSurrogateHigh,
118 PRUnichar aSurrogateLow,
119 char* aOut)
121 if( NS_IS_HIGH_SURROGATE(aSurrogateHigh) &&
122 NS_IS_LOW_SURROGATE(aSurrogateLow) )
124 // notice that idx does not include the 0x10000
125 PRUint32 idx = ((aSurrogateHigh - (PRUnichar)0xD800) << 10 ) |
126 (aSurrogateLow - (PRUnichar) 0xDC00);
128 unsigned char *out = (unsigned char*) aOut;
129 // notice this is from 0x90 for supplment planes
130 out[0] = (idx / (10*126*10)) + 0x90;
131 idx %= (10*126*10);
132 out[1] = (idx / (10*126)) + 0x30;
133 idx %= (10*126);
134 out[2] = (idx / (10)) + 0x81;
135 out[3] = (idx % 10) + 0x30;
136 return PR_TRUE;
138 return PR_FALSE;
141 //----------------------------------------------------------------------
142 // Class nsUnicodeToGBK [implementation]
144 nsUnicodeToGBK::nsUnicodeToGBK(PRUint32 aMaxLength) :
145 nsEncoderSupport(aMaxLength)
147 mExtensionEncoder = nsnull;
148 m4BytesEncoder = nsnull;
149 mUtil.InitToGBKTable();
150 mSurrogateHigh = 0;
152 void nsUnicodeToGBK::CreateExtensionEncoder()
154 mExtensionEncoder = new nsUnicodeToGBKUniq2Bytes();
156 void nsUnicodeToGBK::Create4BytesEncoder()
158 m4BytesEncoder = nsnull;
160 PRBool nsUnicodeToGBK::TryExtensionEncoder(
161 PRUnichar aChar,
162 char* aOut,
163 PRInt32 *aOutLen
166 if( NS_IS_HIGH_SURROGATE(aChar) ||
167 NS_IS_LOW_SURROGATE(aChar) )
169 // performance tune for surrogate characters
170 return PR_FALSE;
172 if(! mExtensionEncoder )
173 CreateExtensionEncoder();
174 if(mExtensionEncoder)
176 PRInt32 len = 1;
177 nsresult res = NS_OK;
178 res = mExtensionEncoder->Convert(&aChar, &len, aOut, aOutLen);
179 if(NS_SUCCEEDED(res) && (*aOutLen > 0))
180 return PR_TRUE;
182 return PR_FALSE;
185 PRBool nsUnicodeToGBK::Try4BytesEncoder(
186 PRUnichar aChar,
187 char* aOut,
188 PRInt32 *aOutLen
191 if( NS_IS_HIGH_SURROGATE(aChar) ||
192 NS_IS_LOW_SURROGATE(aChar) )
194 // performance tune for surrogate characters
195 return PR_FALSE;
197 if(! m4BytesEncoder )
198 Create4BytesEncoder();
199 if(m4BytesEncoder)
201 PRInt32 len = 1;
202 nsresult res = NS_OK;
203 res = m4BytesEncoder->Convert(&aChar, &len, aOut, aOutLen);
204 NS_ASSERTION(NS_FAILED(res) || ((1 == len) && (4 == *aOutLen)),
205 "unexpect conversion length");
206 if(NS_SUCCEEDED(res) && (*aOutLen > 0))
207 return PR_TRUE;
209 return PR_FALSE;
211 PRBool nsUnicodeToGBK::EncodeSurrogate(
212 PRUnichar aSurrogateHigh,
213 PRUnichar aSurrogateLow,
214 char* aOut)
216 return PR_FALSE; // GBK cannot encode Surrogate, let the subclass encode it.
219 NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(
220 const PRUnichar * aSrc,
221 PRInt32 * aSrcLength,
222 char * aDest,
223 PRInt32 * aDestLength)
225 PRInt32 iSrcLength = 0;
226 PRInt32 iDestLength = 0;
227 PRUnichar unicode;
228 nsresult res = NS_OK;
229 while (iSrcLength < *aSrcLength )
231 unicode = *aSrc;
232 //if unicode's hi byte has something, it is not ASCII, must be a GB
233 if(IS_ASCII(unicode))
235 // this is an ASCII
236 *aDest = CAST_UNICHAR_TO_CHAR(*aSrc);
237 aDest++; // increment 1 byte
238 iDestLength +=1;
239 } else {
240 char byte1, byte2;
241 if(mUtil.UnicodeToGBKChar( unicode, PR_FALSE, &byte1, &byte2))
243 // make sure we still have 2 bytes for output first
244 if(iDestLength+2 > *aDestLength)
246 res = NS_OK_UENC_MOREOUTPUT;
247 break;
249 aDest[0] = byte1;
250 aDest[1] = byte2;
251 aDest += 2; // increment 2 bytes
252 iDestLength +=2;
253 } else {
254 PRInt32 aOutLen = 2;
255 // make sure we still have 2 bytes for output first
256 if(iDestLength+2 > *aDestLength)
258 res = NS_OK_UENC_MOREOUTPUT;
259 break;
261 // we cannot map in the common mapping. Let's try to
262 // call the delegated 2 byte converter for the gbk or gb18030
263 // unique 2 byte mapping
264 if(TryExtensionEncoder(unicode, aDest, &aOutLen))
266 iDestLength += aOutLen;
267 aDest += aOutLen;
268 } else {
269 // make sure we still have 4 bytes for output first
270 if(iDestLength+4 > *aDestLength)
272 res = NS_OK_UENC_MOREOUTPUT;
273 break;
275 // we still cannot map. Let's try to
276 // call the delegated GB18030 4 byte converter
277 aOutLen = 4;
278 if( NS_IS_HIGH_SURROGATE(unicode) )
280 if((iSrcLength+1) < *aSrcLength ) {
281 if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) {
282 // since we got a surrogate pair, we need to increment src.
283 iSrcLength++ ;
284 aSrc++;
285 iDestLength += aOutLen;
286 aDest += aOutLen;
287 } else {
288 // only get a high surrogate, but not a low surrogate
289 res = NS_ERROR_UENC_NOMAPPING;
290 iSrcLength++; // include length of the unmapped character
291 break;
293 } else {
294 mSurrogateHigh = aSrc[0];
295 break; // this will go to afterwhileloop
297 } else {
298 if( NS_IS_LOW_SURROGATE(unicode) )
300 if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) {
301 if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) {
302 iDestLength += aOutLen;
303 aDest += aOutLen;
304 } else {
305 // only get a high surrogate, but not a low surrogate
306 res = NS_ERROR_UENC_NOMAPPING;
307 iSrcLength++; // include length of the unmapped character
308 break;
310 } else {
311 // only get a low surrogate, but not a low surrogate
312 res = NS_ERROR_UENC_NOMAPPING;
313 iSrcLength++; // include length of the unmapped character
314 break;
316 } else {
317 if(Try4BytesEncoder(unicode, aDest, &aOutLen))
319 NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here");
320 iDestLength += aOutLen;
321 aDest += aOutLen;
322 } else {
323 res = NS_ERROR_UENC_NOMAPPING;
324 iSrcLength++; // include length of the unmapped character
325 break;
332 iSrcLength++ ; // Each unicode char just count as one in PRUnichar string;
333 mSurrogateHigh = 0;
334 aSrc++;
335 if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) )
337 res = NS_OK_UENC_MOREOUTPUT;
338 break;
341 //afterwhileloop:
342 *aDestLength = iDestLength;
343 *aSrcLength = iSrcLength;
344 return res;
347 //----------------------------------------------------------------------
348 // Subclassing of nsTableEncoderSupport class [implementation]
350 NS_IMETHODIMP nsUnicodeToGBK::FillInfo(PRUint32 *aInfo)
352 mUtil.FillInfo(aInfo, 0x81, 0xFE, 0x40, 0xFE);
353 if(! mExtensionEncoder )
354 CreateExtensionEncoder();
355 if(mExtensionEncoder)
357 nsCOMPtr<nsICharRepresentable> aRep = do_QueryInterface(mExtensionEncoder);
358 aRep->FillInfo(aInfo);
361 if(! m4BytesEncoder )
362 Create4BytesEncoder();
363 if(m4BytesEncoder)
365 nsCOMPtr<nsICharRepresentable> aRep = do_QueryInterface(m4BytesEncoder);
366 aRep->FillInfo(aInfo);
369 //GBK font lib also have single byte ASCII characters, set them here
370 for (PRUint16 SrcUnicode = 0x0000; SrcUnicode <= 0x007F; SrcUnicode++)
371 SET_REPRESENTABLE(aInfo, SrcUnicode);
372 SET_REPRESENTABLE(aInfo, 0x20ac); // euro
373 return NS_OK;