1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * Pierre Phaneuf <pp@ludusdesign.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
42 #include "nsIServiceManager.h"
43 #include "nsIComponentManager.h"
44 #include "nsICharsetConverterManager.h"
45 #include "nsSaveAsCharset.h"
47 #include "nsUnicharUtils.h"
48 #include "nsCompressedCharMap.h"
51 // nsISupports methods
53 NS_IMPL_ISUPPORTS1(nsSaveAsCharset
, nsISaveAsCharset
)
58 nsSaveAsCharset::nsSaveAsCharset()
60 mAttribute
= attr_htmlTextDefault
;
62 mCharsetListIndex
= -1;
65 nsSaveAsCharset::~nsSaveAsCharset()
70 nsSaveAsCharset::Init(const char *charset
, PRUint32 attr
, PRUint32 entityVersion
)
75 mEntityVersion
= entityVersion
;
77 rv
= SetupCharsetList(charset
);
78 NS_ENSURE_SUCCESS(rv
, rv
);
80 // set up unicode encoder
81 rv
= SetupUnicodeEncoder(GetNextCharset());
82 NS_ENSURE_SUCCESS(rv
, rv
);
84 // set up entity converter
85 if (attr_EntityNone
!= MASK_ENTITY(mAttribute
) && !mEntityConverter
)
86 mEntityConverter
= do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID
, &rv
);
92 nsSaveAsCharset::Convert(const PRUnichar
*inString
, char **_retval
)
94 if (nsnull
== _retval
)
95 return NS_ERROR_NULL_POINTER
;
96 if (nsnull
== inString
)
97 return NS_ERROR_NULL_POINTER
;
99 return NS_ERROR_ILLEGAL_VALUE
;
102 NS_ASSERTION(mEncoder
, "need to call Init() before Convert()");
103 NS_ENSURE_TRUE(mEncoder
, NS_ERROR_FAILURE
);
107 // make sure to start from the first charset in the list
108 if (mCharsetListIndex
> 0) {
109 mCharsetListIndex
= -1;
110 rv
= SetupUnicodeEncoder(GetNextCharset());
111 NS_ENSURE_SUCCESS(rv
, rv
);
115 // fallback to the next charset in the list if the last conversion failed by an unmapped character
116 if (MASK_CHARSET_FALLBACK(mAttribute
) && NS_ERROR_UENC_NOMAPPING
== rv
) {
117 const char * charset
= GetNextCharset();
120 rv
= SetupUnicodeEncoder(charset
);
121 NS_ENSURE_SUCCESS(rv
, rv
);
125 if (attr_EntityBeforeCharsetConv
== MASK_ENTITY(mAttribute
)) {
126 NS_ASSERTION(mEntityConverter
, "need to call Init() before Convert()");
127 NS_ENSURE_TRUE(mEntityConverter
, NS_ERROR_FAILURE
);
128 PRUnichar
*entity
= nsnull
;
129 // do the entity conversion first
130 rv
= mEntityConverter
->ConvertToEntities(inString
, mEntityVersion
, &entity
);
131 if(NS_SUCCEEDED(rv
)) {
132 rv
= DoCharsetConversion(entity
, _retval
);
133 nsMemory::Free(entity
);
137 rv
= DoCharsetConversion(inString
, _retval
);
139 } while (MASK_CHARSET_FALLBACK(mAttribute
) && NS_ERROR_UENC_NOMAPPING
== rv
);
145 nsSaveAsCharset::GetCharset(char * *aCharset
)
147 NS_ENSURE_ARG(aCharset
);
148 NS_ASSERTION(mCharsetListIndex
>= 0, "need to call Init() first");
149 NS_ENSURE_TRUE(mCharsetListIndex
>= 0, NS_ERROR_FAILURE
);
151 const char *charset
= mCharsetList
[mCharsetListIndex
]->get();
154 NS_ASSERTION(charset
, "make sure to call Init() with non empty charset list");
155 return NS_ERROR_FAILURE
;
158 *aCharset
= nsCRT::strdup(charset
);
159 return (*aCharset
) ? NS_OK
: NS_ERROR_OUT_OF_MEMORY
;
162 /////////////////////////////////////////////////////////////////////////////////////////
164 // do the fallback, reallocate the buffer if necessary
165 // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
167 nsSaveAsCharset::HandleFallBack(PRUint32 character
, char **outString
, PRInt32
*bufferLength
,
168 PRInt32
*currentPos
, PRInt32 estimatedLength
)
170 if((nsnull
== outString
) || (nsnull
== bufferLength
) ||(nsnull
==currentPos
))
171 return NS_ERROR_NULL_POINTER
;
172 char fallbackStr
[256];
173 nsresult rv
= DoConversionFallBack(character
, fallbackStr
, 256);
174 if (NS_SUCCEEDED(rv
)) {
175 PRInt32 tempLen
= (PRInt32
) PL_strlen(fallbackStr
);
177 // reallocate if the buffer is not large enough
178 if ((tempLen
+ estimatedLength
) >= (*bufferLength
- *currentPos
)) {
179 char *temp
= (char *) PR_Realloc(*outString
, *bufferLength
+ tempLen
);
181 // adjust length/pointer after realloc
182 *bufferLength
+= tempLen
;
187 return NS_ERROR_OUT_OF_MEMORY
;
190 memcpy((*outString
+ *currentPos
), fallbackStr
, tempLen
);
191 *currentPos
+= tempLen
;
197 nsSaveAsCharset::DoCharsetConversion(const PRUnichar
*inString
, char **outString
)
199 if(nsnull
== outString
)
200 return NS_ERROR_NULL_POINTER
;
201 NS_ASSERTION(outString
, "invalid input");
206 PRInt32 inStringLength
= nsCRT::strlen(inString
); // original input string length
207 PRInt32 bufferLength
; // allocated buffer length
208 PRInt32 srcLength
= inStringLength
;
212 nsresult saveResult
= NS_OK
; // to remember NS_ERROR_UENC_NOMAPPING
214 // estimate and allocate the target buffer (reserve extra memory for fallback)
215 rv
= mEncoder
->GetMaxLength(inString
, inStringLength
, &dstLength
);
216 if (NS_FAILED(rv
)) return rv
;
218 bufferLength
= dstLength
+ 512; // reserve 512 byte for fallback.
219 dstPtr
= (char *) PR_Malloc(bufferLength
);
220 if (NULL
== dstPtr
) return NS_ERROR_OUT_OF_MEMORY
;
223 for (pos1
= 0, pos2
= 0; pos1
< inStringLength
;) {
224 // convert from unicode
225 dstLength
= bufferLength
- pos2
;
226 rv
= mEncoder
->Convert(&inString
[pos1
], &srcLength
, &dstPtr
[pos2
], &dstLength
);
228 pos1
+= srcLength
? srcLength
: 1;
232 // break: this is usually the case (no error) OR unrecoverable error
233 if (NS_ERROR_UENC_NOMAPPING
!= rv
) break;
235 // remember this happened and reset the result
239 // finish encoder, give it a chance to write extra data like escape sequences
240 dstLength
= bufferLength
- pos2
;
241 rv
= mEncoder
->Finish(&dstPtr
[pos2
], &dstLength
);
242 if (NS_SUCCEEDED(rv
)) {
247 srcLength
= inStringLength
- pos1
;
250 if (!ATTR_NO_FALLBACK(mAttribute
)) {
251 PRUint32 unMappedChar
;
252 if (NS_IS_HIGH_SURROGATE(inString
[pos1
-1]) &&
253 inStringLength
> pos1
&& NS_IS_LOW_SURROGATE(inString
[pos1
])) {
254 unMappedChar
= SURROGATE_TO_UCS4(inString
[pos1
-1], inString
[pos1
]);
257 unMappedChar
= inString
[pos1
-1];
260 rv
= mEncoder
->GetMaxLength(inString
+pos1
, inStringLength
-pos1
, &dstLength
);
264 rv
= HandleFallBack(unMappedChar
, &dstPtr
, &bufferLength
, &pos2
, dstLength
);
271 if (NS_SUCCEEDED(rv
)) {
272 // finish encoder, give it a chance to write extra data like escape sequences
273 dstLength
= bufferLength
- pos2
;
274 rv
= mEncoder
->Finish(&dstPtr
[pos2
], &dstLength
);
275 if (NS_SUCCEEDED(rv
)) {
286 *outString
= dstPtr
; // set the result string
288 // set error code so that the caller can do own fall back
289 if (NS_ERROR_UENC_NOMAPPING
== saveResult
) {
290 rv
= NS_ERROR_UENC_NOMAPPING
;
297 nsSaveAsCharset::DoConversionFallBack(PRUint32 inUCS4
, char *outString
, PRInt32 bufferLength
)
299 NS_ASSERTION(outString
, "invalid input");
300 if(nsnull
== outString
)
301 return NS_ERROR_NULL_POINTER
;
307 if (ATTR_NO_FALLBACK(mAttribute
)) {
310 if (attr_EntityAfterCharsetConv
== MASK_ENTITY(mAttribute
)) {
312 rv
= mEntityConverter
->ConvertUTF32ToEntity(inUCS4
, mEntityVersion
, &entity
);
313 if (NS_SUCCEEDED(rv
)) {
314 if (NULL
== entity
|| (PRInt32
)strlen(entity
) > bufferLength
) {
315 return NS_ERROR_OUT_OF_MEMORY
;
317 PL_strcpy(outString
, entity
);
318 nsMemory::Free(entity
);
323 switch (MASK_FALLBACK(mAttribute
)) {
324 case attr_FallbackQuestionMark
:
325 if(bufferLength
>=2) {
330 rv
= NS_ERROR_FAILURE
;
333 case attr_FallbackEscapeU
:
334 if (inUCS4
& 0xff0000)
335 rv
= (PR_snprintf(outString
, bufferLength
, "\\u%.6x", inUCS4
) > 0) ? NS_OK
: NS_ERROR_FAILURE
;
337 rv
= (PR_snprintf(outString
, bufferLength
, "\\u%.4x", inUCS4
) > 0) ? NS_OK
: NS_ERROR_FAILURE
;
339 case attr_FallbackDecimalNCR
:
340 rv
= ( PR_snprintf(outString
, bufferLength
, "&#%u;", inUCS4
) > 0) ? NS_OK
: NS_ERROR_FAILURE
;
342 case attr_FallbackHexNCR
:
343 rv
= (PR_snprintf(outString
, bufferLength
, "&#x%x;", inUCS4
) > 0) ? NS_OK
: NS_ERROR_FAILURE
;
345 case attr_FallbackNone
:
349 rv
= NS_ERROR_ILLEGAL_VALUE
;
356 nsresult
nsSaveAsCharset::SetupUnicodeEncoder(const char* charset
)
358 NS_ENSURE_ARG(charset
);
361 // set up unicode encoder
362 nsCOMPtr
<nsICharsetConverterManager
> ccm
= do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID
, &rv
);
363 NS_ENSURE_SUCCESS(rv
, rv
);
365 return ccm
->GetUnicodeEncoder(charset
, getter_AddRefs(mEncoder
));
368 nsresult
nsSaveAsCharset::SetupCharsetList(const char *charsetList
)
370 NS_ENSURE_ARG(charsetList
);
372 NS_ASSERTION(charsetList
[0], "charsetList should not be empty");
374 return NS_ERROR_INVALID_ARG
;
376 if (mCharsetListIndex
>= 0) {
377 mCharsetList
.Clear();
378 mCharsetListIndex
= -1;
381 mCharsetList
.ParseString(charsetList
, ", ");
386 const char * nsSaveAsCharset::GetNextCharset()
388 if ((mCharsetListIndex
+ 1) >= mCharsetList
.Count())
391 // bump the index and return the next charset
392 return mCharsetList
[++mCharsetListIndex
]->get();
395 /////////////////////////////////////////////////////////////////////////////////////////
398 NS_NewSaveAsCharset(nsISupports
**inst
)
401 return NS_ERROR_NULL_POINTER
;
402 *inst
= (nsISupports
*) new nsSaveAsCharset
;
405 return (*inst
) ? NS_OK
: NS_ERROR_OUT_OF_MEMORY
;