Bug 454376 add -lCrun -lCstd for Solaris OS_LIBS, r=bsmedberg
[wine-gecko.git] / intl / uconv / ucvcn / nsGBKToUnicode.cpp
blob186f738edd4b01f9d8cc0ca383c623e3490b1dbd
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
37 /**
38 * A character set converter from GBK to Unicode.
41 * @created 07/Sept/1999
42 * @author Yueheng Xu, Yueheng.Xu@intel.com
45 #include "nsGBKToUnicode.h"
46 #include "nsUCvCnDll.h"
47 #include "gbku.h"
50 //------------------------------------------------------------
51 // nsGBKUnique2BytesToUnicode
52 //------------------------------------------------------------
53 class nsGBKUnique2BytesToUnicode : public nsTableDecoderSupport
55 public:
56 nsGBKUnique2BytesToUnicode();
57 virtual ~nsGBKUnique2BytesToUnicode()
58 { }
59 protected:
62 static const PRUint16 g_utGBKUnique2Bytes[] = {
63 #include "gbkuniq2b.ut"
65 nsGBKUnique2BytesToUnicode::nsGBKUnique2BytesToUnicode()
66 : nsTableDecoderSupport(u2BytesCharset, nsnull,
67 (uMappingTable*) &g_utGBKUnique2Bytes, 1)
71 //------------------------------------------------------------
72 // nsGB18030Unique2BytesToUnicode
73 //------------------------------------------------------------
74 class nsGB18030Unique2BytesToUnicode : public nsTableDecoderSupport
76 public:
77 nsGB18030Unique2BytesToUnicode();
78 virtual ~nsGB18030Unique2BytesToUnicode()
79 { }
80 protected:
83 static const PRUint16 g_utGB18030Unique2Bytes[] = {
84 #include "gb18030uniq2b.ut"
86 nsGB18030Unique2BytesToUnicode::nsGB18030Unique2BytesToUnicode()
87 : nsTableDecoderSupport(u2BytesCharset, nsnull,
88 (uMappingTable*) &g_utGB18030Unique2Bytes, 1)
92 //------------------------------------------------------------
93 // nsGB18030Unique4BytesToUnicode
94 //------------------------------------------------------------
95 class nsGB18030Unique4BytesToUnicode : public nsTableDecoderSupport
97 public:
98 nsGB18030Unique4BytesToUnicode();
99 virtual ~nsGB18030Unique4BytesToUnicode()
101 protected:
104 static const PRUint16 g_utGB18030Unique4Bytes[] = {
105 #include "gb180304bytes.ut"
107 nsGB18030Unique4BytesToUnicode::nsGB18030Unique4BytesToUnicode()
108 : nsTableDecoderSupport(u4BytesGB18030Charset, nsnull,
109 (uMappingTable*) &g_utGB18030Unique4Bytes, 1)
114 //----------------------------------------------------------------------
115 // Class nsGBKToUnicode [implementation]
117 //----------------------------------------------------------------------
118 // Subclassing of nsTablesDecoderSupport class [implementation]
120 #define LEGAL_GBK_MULTIBYTE_FIRST_BYTE(c) \
121 (UINT8_IN_RANGE(0x81, (c), 0xFE))
122 #define FIRST_BYTE_IS_SURROGATE(c) \
123 (UINT8_IN_RANGE(0x90, (c), 0xFE))
124 #define LEGAL_GBK_2BYTE_SECOND_BYTE(c) \
125 (UINT8_IN_RANGE(0x40, (c), 0x7E)|| UINT8_IN_RANGE(0x80, (c), 0xFE))
126 #define LEGAL_GBK_4BYTE_SECOND_BYTE(c) \
127 (UINT8_IN_RANGE(0x30, (c), 0x39))
128 #define LEGAL_GBK_4BYTE_THIRD_BYTE(c) \
129 (UINT8_IN_RANGE(0x81, (c), 0xFE))
130 #define LEGAL_GBK_4BYTE_FORTH_BYTE(c) \
131 (UINT8_IN_RANGE(0x30, (c), 0x39))
133 NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
134 PRInt32 * aSrcLength,
135 PRUnichar *aDest,
136 PRInt32 * aDestLength)
138 PRInt32 i=0;
139 PRInt32 iSrcLength = (*aSrcLength);
140 PRInt32 iDestlen = 0;
141 nsresult rv=NS_OK;
142 *aSrcLength = 0;
144 for (i=0;i<iSrcLength;i++)
146 if ( iDestlen >= (*aDestLength) )
148 rv = NS_OK_UDEC_MOREOUTPUT;
149 break;
151 // The valid range for the 1st byte is [0x81,0xFE]
152 if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc))
154 if(i+1 >= iSrcLength)
156 rv = NS_OK_UDEC_MOREINPUT;
157 break;
159 // To make sure, the second byte has to be checked as well.
160 // In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE]
161 if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1]))
163 // Valid GBK code
164 *aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
165 if(UCS2_NO_MAPPING == *aDest)
167 // We cannot map in the common mapping, let's call the
168 // delegate 2 byte decoder to decode the gbk or gb18030 unique
169 // 2 byte mapping
170 if(! TryExtensionDecoder(aSrc, aDest))
172 *aDest = UCS2_NO_MAPPING;
175 aSrc += 2;
176 i++;
178 else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
180 // from the first 2 bytes, it looks like a 4 byte GB18030
181 if(i+3 >= iSrcLength) // make sure we got 4 bytes
183 rv = NS_OK_UDEC_MOREINPUT;
184 break;
186 // 4 bytes patten
187 // [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39]
188 // preset the
190 if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) &&
191 LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
193 if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
195 // let's call the delegated 4 byte gb18030 converter to convert it
196 if(! Try4BytesDecoder(aSrc, aDest))
197 *aDest = UCS2_NO_MAPPING;
198 } else {
199 // let's try supplement mapping
200 NS_ASSERTION(( (iDestlen+1) <= (*aDestLength) ), "no enouth output memory");
201 if ( (iDestlen+1) <= (*aDestLength) )
203 if(DecodeToSurrogate(aSrc, aDest))
205 // surrogte two PRUnichar
206 iDestlen++;
207 aDest++;
208 } else {
209 *aDest = UCS2_NO_MAPPING;
211 } else {
212 *aDest = UCS2_NO_MAPPING;
215 } else {
216 *aDest = UCS2_NO_MAPPING;
218 aSrc += 4;
219 i+=3;
221 else if ((PRUint8) aSrc[0] == (PRUint8)0xA0 )
223 // stand-alone (not followed by a valid second byte) 0xA0 !
224 // treat it as valid a la Netscape 4.x
225 *aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
226 aSrc++;
227 } else {
228 // Invalid GBK code point (second byte should be 0x40 or higher)
229 *aDest = UCS2_NO_MAPPING;
230 aSrc++;
232 } else {
233 if(IS_ASCII(*aSrc))
235 // The source is an ASCII
236 *aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
237 aSrc++;
238 } else {
239 if(IS_GBK_EURO(*aSrc)) {
240 *aDest = UCS2_EURO;
241 } else {
242 *aDest = UCS2_NO_MAPPING;
244 aSrc++;
247 iDestlen++;
248 aDest++;
249 *aSrcLength = i+1;
251 *aDestLength = iDestlen;
252 return rv;
256 void nsGBKToUnicode::CreateExtensionDecoder()
258 mExtensionDecoder = new nsGBKUnique2BytesToUnicode();
260 void nsGBKToUnicode::Create4BytesDecoder()
262 m4BytesDecoder = nsnull;
264 void nsGB18030ToUnicode::CreateExtensionDecoder()
266 mExtensionDecoder = new nsGB18030Unique2BytesToUnicode();
268 void nsGB18030ToUnicode::Create4BytesDecoder()
270 m4BytesDecoder = new nsGB18030Unique4BytesToUnicode();
272 PRBool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
274 NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte");
275 NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte");
276 NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte");
277 NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte");
278 if(! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
279 return PR_FALSE;
280 if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
281 return PR_FALSE;
282 if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]))
283 return PR_FALSE;
284 if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
285 return PR_FALSE;
287 PRUint8 a1 = (PRUint8) aSrc[0];
288 PRUint8 a2 = (PRUint8) aSrc[1];
289 PRUint8 a3 = (PRUint8) aSrc[2];
290 PRUint8 a4 = (PRUint8) aSrc[3];
291 a1 -= (PRUint8)0x90;
292 a2 -= (PRUint8)0x30;
293 a3 -= (PRUint8)0x81;
294 a4 -= (PRUint8)0x30;
295 PRUint32 idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4;
297 *aOut++ = 0xD800 | (0x000003FF & (idx >> 10));
298 *aOut = 0xDC00 | (0x000003FF & idx);
300 return PR_TRUE;
302 PRBool nsGBKToUnicode::TryExtensionDecoder(const char* aSrc, PRUnichar* aOut)
304 if(!mExtensionDecoder)
305 CreateExtensionDecoder();
306 NS_ASSERTION(mExtensionDecoder, "cannot creqte 2 bytes unique converter");
307 if(mExtensionDecoder)
309 nsresult res = mExtensionDecoder->Reset();
310 NS_ASSERTION(NS_SUCCEEDED(res), "2 bytes unique conversoin reset failed");
311 PRInt32 len = 2;
312 PRInt32 dstlen = 1;
313 res = mExtensionDecoder->Convert(aSrc,&len, aOut, &dstlen);
314 NS_ASSERTION(NS_FAILED(res) || ((len==2) && (dstlen == 1)),
315 "some strange conversion result");
316 // if we failed, we then just use the 0xfffd
317 // therefore, we ignore the res here.
318 if(NS_SUCCEEDED(res))
319 return PR_TRUE;
321 return PR_FALSE;
323 PRBool nsGBKToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
325 return PR_FALSE;
327 PRBool nsGBKToUnicode::Try4BytesDecoder(const char* aSrc, PRUnichar* aOut)
329 if(!m4BytesDecoder)
330 Create4BytesDecoder();
331 if(m4BytesDecoder)
333 nsresult res = m4BytesDecoder->Reset();
334 NS_ASSERTION(NS_SUCCEEDED(res), "4 bytes unique conversoin reset failed");
335 PRInt32 len = 4;
336 PRInt32 dstlen = 1;
337 res = m4BytesDecoder->Convert(aSrc,&len, aOut, &dstlen);
338 NS_ASSERTION(NS_FAILED(res) || ((len==4) && (dstlen == 1)),
339 "some strange conversion result");
340 // if we failed, we then just use the 0xfffd
341 // therefore, we ignore the res here.
342 if(NS_SUCCEEDED(res))
343 return PR_TRUE;
345 return PR_FALSE;