1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Mozilla Communicator client code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 //----------------------------------------------------------------------
39 // Global functions and data [declaration]
40 #include "nsUCSupport.h"
41 #include "nsUnicodeToUTF8.h"
44 NS_IMPL_ISUPPORTS1(nsUnicodeToUTF8
, nsIUnicodeEncoder
)
46 //----------------------------------------------------------------------
47 // nsUnicodeToUTF8 class [implementation]
49 NS_IMETHODIMP
nsUnicodeToUTF8::GetMaxLength(const PRUnichar
* aSrc
,
51 PRInt32
* aDestLength
)
53 // aSrc is interpreted as UTF16, 3 is normally enough.
54 // But when previous buffer only contains part of the surrogate pair, we
55 // need to complete it here. If the first word in following buffer is not
56 // in valid surrogate rang, we need to convert the remaining of last buffer
58 *aDestLength
= 3*aSrcLength
+ 3;
62 NS_IMETHODIMP
nsUnicodeToUTF8::FillInfo(PRUint32
*aInfo
)
64 memset(aInfo
, 0xFF, (0x10000L
>> 3));
68 NS_IMETHODIMP
nsUnicodeToUTF8::Convert(const PRUnichar
* aSrc
,
71 PRInt32
* aDestLength
)
73 const PRUnichar
* src
= aSrc
;
74 const PRUnichar
* srcEnd
= aSrc
+ *aSrcLength
;
76 PRInt32 destLen
= *aDestLength
;
79 //complete remaining of last conversion
83 return NS_OK_UENC_MOREINPUT
;
85 if (*aDestLength
< 4) {
88 return NS_OK_UENC_MOREOUTPUT
;
90 if (*src
< (PRUnichar
)0xdc00 || *src
> (PRUnichar
)0xdfff) { //not a pair
91 *dest
++ = (char)0xe0 | (mHighSurrogate
>> 12);
92 *dest
++ = (char)0x80 | ((mHighSurrogate
>> 6) & 0x003f);
93 *dest
++ = (char)0x80 | (mHighSurrogate
& 0x003f);
96 n
= ((mHighSurrogate
- (PRUnichar
)0xd800) << 10) +
97 (*src
- (PRUnichar
)0xdc00) + 0x10000;
98 *dest
++ = (char)0xf0 | (n
>> 18);
99 *dest
++ = (char)0x80 | ((n
>> 12) & 0x3f);
100 *dest
++ = (char)0x80 | ((n
>> 6) & 0x3f);
101 *dest
++ = (char)0x80 | (n
& 0x3f);
108 while (src
< srcEnd
) {
109 if ( *src
<= 0x007f) {
111 goto error_more_output
;
112 *dest
++ = (char)*src
;
114 } else if (*src
<= 0x07ff) {
116 goto error_more_output
;
117 *dest
++ = (char)0xc0 | (*src
>> 6);
118 *dest
++ = (char)0x80 | (*src
& 0x003f);
120 } else if (*src
>= (PRUnichar
)0xD800 && *src
< (PRUnichar
)0xDC00) {
121 if ((src
+1) >= srcEnd
) {
122 //we need another surrogate to complete this unicode char
123 mHighSurrogate
= *src
;
124 *aDestLength
= dest
- aDest
;
125 return NS_OK_UENC_MOREINPUT
;
129 goto error_more_output
;
130 if (*(src
+1) < (PRUnichar
)0xdc00 || *(src
+1) > 0xdfff) { //not a pair
131 *dest
++ = (char)0xe0 | (*src
>> 12);
132 *dest
++ = (char)0x80 | ((*src
>> 6) & 0x003f);
133 *dest
++ = (char)0x80 | (*src
& 0x003f);
136 n
= ((*src
- (PRUnichar
)0xd800) << 10) + (*(src
+1) - (PRUnichar
)0xdc00) + (PRUint32
)0x10000;
137 *dest
++ = (char)0xf0 | (n
>> 18);
138 *dest
++ = (char)0x80 | ((n
>> 12) & 0x3f);
139 *dest
++ = (char)0x80 | ((n
>> 6) & 0x3f);
140 *dest
++ = (char)0x80 | (n
& 0x3f);
146 goto error_more_output
;
147 //treat rest of the character as BMP
148 *dest
++ = (char)0xe0 | (*src
>> 12);
149 *dest
++ = (char)0x80 | ((*src
>> 6) & 0x003f);
150 *dest
++ = (char)0x80 | (*src
& 0x003f);
156 *aDestLength
= dest
- aDest
;
160 *aSrcLength
= src
- aSrc
;
161 *aDestLength
= dest
- aDest
;
162 return NS_OK_UENC_MOREOUTPUT
;
165 NS_IMETHODIMP
nsUnicodeToUTF8::Finish(char * aDest
, PRInt32
* aDestLength
)
169 if (mHighSurrogate
) {
170 if (*aDestLength
< 3) {
172 return NS_OK_UENC_MOREOUTPUT
;
174 *dest
++ = (char)0xe0 | (mHighSurrogate
>> 12);
175 *dest
++ = (char)0x80 | ((mHighSurrogate
>> 6) & 0x003f);
176 *dest
++ = (char)0x80 | (mHighSurrogate
& 0x003f);