Investigating leaks in bug 463263, backout bug 453403.
[wine-gecko.git] / intl / unicharutil / src / nsSaveAsCharset.cpp
blob823d3709a4bdcd76edfd340e6eee1dddbbdf9d69
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Pierre Phaneuf <pp@ludusdesign.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
40 #include "prmem.h"
41 #include "prprf.h"
42 #include "nsIServiceManager.h"
43 #include "nsIComponentManager.h"
44 #include "nsICharsetConverterManager.h"
45 #include "nsSaveAsCharset.h"
46 #include "nsCRT.h"
47 #include "nsUnicharUtils.h"
48 #include "nsCompressedCharMap.h"
51 // nsISupports methods
53 NS_IMPL_ISUPPORTS1(nsSaveAsCharset, nsISaveAsCharset)
56 // nsSaveAsCharset
58 nsSaveAsCharset::nsSaveAsCharset()
60 mAttribute = attr_htmlTextDefault;
61 mEntityVersion = 0;
62 mCharsetListIndex = -1;
65 nsSaveAsCharset::~nsSaveAsCharset()
69 NS_IMETHODIMP
70 nsSaveAsCharset::Init(const char *charset, PRUint32 attr, PRUint32 entityVersion)
72 nsresult rv = NS_OK;
74 mAttribute = attr;
75 mEntityVersion = entityVersion;
77 rv = SetupCharsetList(charset);
78 NS_ENSURE_SUCCESS(rv, rv);
80 // set up unicode encoder
81 rv = SetupUnicodeEncoder(GetNextCharset());
82 NS_ENSURE_SUCCESS(rv, rv);
84 // set up entity converter
85 if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
86 mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
88 return rv;
91 NS_IMETHODIMP
92 nsSaveAsCharset::Convert(const PRUnichar *inString, char **_retval)
94 if (nsnull == _retval)
95 return NS_ERROR_NULL_POINTER;
96 if (nsnull == inString)
97 return NS_ERROR_NULL_POINTER;
98 if (0 == *inString)
99 return NS_ERROR_ILLEGAL_VALUE;
100 nsresult rv = NS_OK;
102 NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
103 NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
105 *_retval = nsnull;
107 // make sure to start from the first charset in the list
108 if (mCharsetListIndex > 0) {
109 mCharsetListIndex = -1;
110 rv = SetupUnicodeEncoder(GetNextCharset());
111 NS_ENSURE_SUCCESS(rv, rv);
114 do {
115 // fallback to the next charset in the list if the last conversion failed by an unmapped character
116 if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
117 const char * charset = GetNextCharset();
118 if (!charset)
119 break;
120 rv = SetupUnicodeEncoder(charset);
121 NS_ENSURE_SUCCESS(rv, rv);
122 PR_FREEIF(*_retval);
125 if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
126 NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
127 NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
128 PRUnichar *entity = nsnull;
129 // do the entity conversion first
130 rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
131 if(NS_SUCCEEDED(rv)) {
132 rv = DoCharsetConversion(entity, _retval);
133 nsMemory::Free(entity);
136 else
137 rv = DoCharsetConversion(inString, _retval);
139 } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
141 return rv;
144 NS_IMETHODIMP
145 nsSaveAsCharset::GetCharset(char * *aCharset)
147 NS_ENSURE_ARG(aCharset);
148 NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
149 NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
151 const char *charset = mCharsetList[mCharsetListIndex]->get();
152 if (!charset) {
153 *aCharset = nsnull;
154 NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
155 return NS_ERROR_FAILURE;
158 *aCharset = nsCRT::strdup(charset);
159 return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
162 /////////////////////////////////////////////////////////////////////////////////////////
164 // do the fallback, reallocate the buffer if necessary
165 // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
166 NS_IMETHODIMP
167 nsSaveAsCharset::HandleFallBack(PRUint32 character, char **outString, PRInt32 *bufferLength,
168 PRInt32 *currentPos, PRInt32 estimatedLength)
170 if((nsnull == outString ) || (nsnull == bufferLength) ||(nsnull ==currentPos))
171 return NS_ERROR_NULL_POINTER;
172 char fallbackStr[256];
173 nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
174 if (NS_SUCCEEDED(rv)) {
175 PRInt32 tempLen = (PRInt32) PL_strlen(fallbackStr);
177 // reallocate if the buffer is not large enough
178 if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
179 char *temp = (char *) PR_Realloc(*outString, *bufferLength + tempLen);
180 if (NULL != temp) {
181 // adjust length/pointer after realloc
182 *bufferLength += tempLen;
183 *outString = temp;
184 } else {
185 *outString = NULL;
186 *bufferLength =0;
187 return NS_ERROR_OUT_OF_MEMORY;
190 memcpy((*outString + *currentPos), fallbackStr, tempLen);
191 *currentPos += tempLen;
193 return rv;
196 NS_IMETHODIMP
197 nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
199 if(nsnull == outString )
200 return NS_ERROR_NULL_POINTER;
201 NS_ASSERTION(outString, "invalid input");
203 *outString = NULL;
205 nsresult rv;
206 PRInt32 inStringLength = nsCRT::strlen(inString); // original input string length
207 PRInt32 bufferLength; // allocated buffer length
208 PRInt32 srcLength = inStringLength;
209 PRInt32 dstLength;
210 char *dstPtr = NULL;
211 PRInt32 pos1, pos2;
212 nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
214 // estimate and allocate the target buffer (reserve extra memory for fallback)
215 rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
216 if (NS_FAILED(rv)) return rv;
218 bufferLength = dstLength + 512; // reserve 512 byte for fallback.
219 dstPtr = (char *) PR_Malloc(bufferLength);
220 if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;
223 for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
224 // convert from unicode
225 dstLength = bufferLength - pos2;
226 rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
228 pos1 += srcLength ? srcLength : 1;
229 pos2 += dstLength;
230 dstPtr[pos2] = '\0';
232 // break: this is usually the case (no error) OR unrecoverable error
233 if (NS_ERROR_UENC_NOMAPPING != rv) break;
235 // remember this happened and reset the result
236 saveResult = rv;
237 rv = NS_OK;
239 // finish encoder, give it a chance to write extra data like escape sequences
240 dstLength = bufferLength - pos2;
241 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
242 if (NS_SUCCEEDED(rv)) {
243 pos2 += dstLength;
244 dstPtr[pos2] = '\0';
247 srcLength = inStringLength - pos1;
249 // do the fallback
250 if (!ATTR_NO_FALLBACK(mAttribute)) {
251 PRUint32 unMappedChar;
252 if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) &&
253 inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
254 unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
255 pos1++;
256 } else {
257 unMappedChar = inString[pos1-1];
260 rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
261 if (NS_FAILED(rv))
262 break;
264 rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
265 if (NS_FAILED(rv))
266 break;
267 dstPtr[pos2] = '\0';
271 if (NS_SUCCEEDED(rv)) {
272 // finish encoder, give it a chance to write extra data like escape sequences
273 dstLength = bufferLength - pos2;
274 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
275 if (NS_SUCCEEDED(rv)) {
276 pos2 += dstLength;
277 dstPtr[pos2] = '\0';
281 if (NS_FAILED(rv)) {
282 PR_FREEIF(dstPtr);
283 return rv;
286 *outString = dstPtr; // set the result string
288 // set error code so that the caller can do own fall back
289 if (NS_ERROR_UENC_NOMAPPING == saveResult) {
290 rv = NS_ERROR_UENC_NOMAPPING;
293 return rv;
296 NS_IMETHODIMP
297 nsSaveAsCharset::DoConversionFallBack(PRUint32 inUCS4, char *outString, PRInt32 bufferLength)
299 NS_ASSERTION(outString, "invalid input");
300 if(nsnull == outString )
301 return NS_ERROR_NULL_POINTER;
303 *outString = '\0';
305 nsresult rv = NS_OK;
307 if (ATTR_NO_FALLBACK(mAttribute)) {
308 return NS_OK;
310 if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
311 char *entity = NULL;
312 rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
313 if (NS_SUCCEEDED(rv)) {
314 if (NULL == entity || (PRInt32)strlen(entity) > bufferLength) {
315 return NS_ERROR_OUT_OF_MEMORY;
317 PL_strcpy(outString, entity);
318 nsMemory::Free(entity);
319 return rv;
323 switch (MASK_FALLBACK(mAttribute)) {
324 case attr_FallbackQuestionMark:
325 if(bufferLength>=2) {
326 *outString++='?';
327 *outString='\0';
328 rv = NS_OK;
329 } else {
330 rv = NS_ERROR_FAILURE;
332 break;
333 case attr_FallbackEscapeU:
334 if (inUCS4 & 0xff0000)
335 rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
336 else
337 rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
338 break;
339 case attr_FallbackDecimalNCR:
340 rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
341 break;
342 case attr_FallbackHexNCR:
343 rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
344 break;
345 case attr_FallbackNone:
346 rv = NS_OK;
347 break;
348 default:
349 rv = NS_ERROR_ILLEGAL_VALUE;
350 break;
353 return rv;
356 nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
358 NS_ENSURE_ARG(charset);
359 nsresult rv;
361 // set up unicode encoder
362 nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
363 NS_ENSURE_SUCCESS(rv, rv);
365 return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
368 nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
370 NS_ENSURE_ARG(charsetList);
372 NS_ASSERTION(charsetList[0], "charsetList should not be empty");
373 if (!charsetList[0])
374 return NS_ERROR_INVALID_ARG;
376 if (mCharsetListIndex >= 0) {
377 mCharsetList.Clear();
378 mCharsetListIndex = -1;
381 mCharsetList.ParseString(charsetList, ", ");
383 return NS_OK;
386 const char * nsSaveAsCharset::GetNextCharset()
388 if ((mCharsetListIndex + 1) >= mCharsetList.Count())
389 return nsnull;
391 // bump the index and return the next charset
392 return mCharsetList[++mCharsetListIndex]->get();
395 /////////////////////////////////////////////////////////////////////////////////////////
397 nsresult
398 NS_NewSaveAsCharset(nsISupports **inst)
400 if(nsnull == inst )
401 return NS_ERROR_NULL_POINTER;
402 *inst = (nsISupports *) new nsSaveAsCharset;
403 if(*inst)
404 NS_ADDREF(*inst);
405 return (*inst) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;