1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * Pierre Phaneuf <pp@ludusdesign.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsIUnicodeEncoder.h"
40 #include "nsICharsetConverterManager.h"
41 #include "nsReadableUtils.h"
42 #include "nsITextToSubURI.h"
43 #include "nsIServiceManager.h"
44 #include "nsUConvDll.h"
47 #include "nsTextToSubURI.h"
50 static NS_DEFINE_CID(kCharsetConverterManagerCID
, NS_ICHARSETCONVERTERMANAGER_CID
);
52 nsTextToSubURI::nsTextToSubURI()
55 nsTextToSubURI::~nsTextToSubURI()
59 NS_IMPL_ISUPPORTS1(nsTextToSubURI
, nsITextToSubURI
)
61 NS_IMETHODIMP
nsTextToSubURI::ConvertAndEscape(
62 const char *charset
, const PRUnichar
*text
, char **_retval
)
65 return NS_ERROR_NULL_POINTER
;
69 // Get Charset, get the encoder.
70 nsICharsetConverterManager
*ccm
;
71 rv
= CallGetService(kCharsetConverterManagerCID
, &ccm
);
72 if(NS_SUCCEEDED(rv
)) {
73 nsIUnicodeEncoder
*encoder
;
74 rv
= ccm
->GetUnicodeEncoder(charset
, &encoder
);
76 if (NS_SUCCEEDED(rv
)) {
77 rv
= encoder
->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace
, nsnull
, (PRUnichar
)'?');
82 PRInt32 ulen
= nsCRT::strlen(text
);
84 if(NS_SUCCEEDED(rv
= encoder
->GetMaxLength(text
, ulen
, &outlen
)))
87 pBuf
= (char*)PR_Malloc(outlen
+1);
93 PRInt32 bufLen
= outlen
;
94 if(NS_SUCCEEDED(rv
= encoder
->Convert(text
,&ulen
, pBuf
, &outlen
))) {
95 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
96 PRInt32 finLen
= bufLen
- outlen
;
98 if (NS_SUCCEEDED(encoder
->Finish((char *)(pBuf
+outlen
), &finLen
)))
102 *_retval
= nsEscape(pBuf
, url_XPAlphas
);
103 if(nsnull
== *_retval
)
104 rv
= NS_ERROR_OUT_OF_MEMORY
;
117 NS_IMETHODIMP
nsTextToSubURI::UnEscapeAndConvert(
118 const char *charset
, const char *text
, PRUnichar
**_retval
)
120 if(nsnull
== _retval
)
121 return NS_ERROR_NULL_POINTER
;
125 // unescape the string, unescape changes the input
126 char *unescaped
= nsCRT::strdup((char *) text
);
127 if (nsnull
== unescaped
)
128 return NS_ERROR_OUT_OF_MEMORY
;
129 unescaped
= nsUnescape(unescaped
);
130 NS_ASSERTION(unescaped
, "nsUnescape returned null");
132 // Convert from the charset to unicode
133 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
134 do_GetService(kCharsetConverterManagerCID
, &rv
);
135 if (NS_SUCCEEDED(rv
)) {
136 nsIUnicodeDecoder
*decoder
;
137 rv
= ccm
->GetUnicodeDecoder(charset
, &decoder
);
138 if (NS_SUCCEEDED(rv
)) {
139 PRUnichar
*pBuf
= nsnull
;
140 PRInt32 len
= strlen(unescaped
);
142 if (NS_SUCCEEDED(rv
= decoder
->GetMaxLength(unescaped
, len
, &outlen
))) {
143 pBuf
= (PRUnichar
*) PR_Malloc((outlen
+1)*sizeof(PRUnichar
*));
145 rv
= NS_ERROR_OUT_OF_MEMORY
;
147 if (NS_SUCCEEDED(rv
= decoder
->Convert(unescaped
, &len
, pBuf
, &outlen
))) {
163 static PRBool
statefulCharset(const char *charset
)
165 if (!nsCRT::strncasecmp(charset
, "ISO-2022-", sizeof("ISO-2022-")-1) ||
166 !nsCRT::strcasecmp(charset
, "UTF-7") ||
167 !nsCRT::strcasecmp(charset
, "HZ-GB-2312"))
173 nsresult
nsTextToSubURI::convertURItoUnicode(const nsAFlatCString
&aCharset
,
174 const nsAFlatCString
&aURI
,
180 // check for 7bit encoding the data may not be ASCII after we decode
181 PRBool isStatefulCharset
= statefulCharset(aCharset
.get());
183 if (!isStatefulCharset
&& IsASCII(aURI
)) {
184 CopyASCIItoUTF16(aURI
, _retval
);
188 if (!isStatefulCharset
&& aIRI
) {
190 CopyUTF8toUTF16(aURI
, _retval
);
195 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
196 NS_ENSURE_FALSE(aCharset
.IsEmpty(), NS_ERROR_INVALID_ARG
);
198 nsCOMPtr
<nsICharsetConverterManager
> charsetConverterManager
;
200 charsetConverterManager
= do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID
, &rv
);
201 NS_ENSURE_SUCCESS(rv
, rv
);
203 nsCOMPtr
<nsIUnicodeDecoder
> unicodeDecoder
;
204 rv
= charsetConverterManager
->GetUnicodeDecoder(aCharset
.get(),
205 getter_AddRefs(unicodeDecoder
));
206 NS_ENSURE_SUCCESS(rv
, rv
);
208 PRInt32 srcLen
= aURI
.Length();
210 rv
= unicodeDecoder
->GetMaxLength(aURI
.get(), srcLen
, &dstLen
);
211 NS_ENSURE_SUCCESS(rv
, rv
);
213 PRUnichar
*ustr
= (PRUnichar
*) nsMemory::Alloc(dstLen
* sizeof(PRUnichar
));
214 NS_ENSURE_TRUE(ustr
, NS_ERROR_OUT_OF_MEMORY
);
216 rv
= unicodeDecoder
->Convert(aURI
.get(), &srcLen
, ustr
, &dstLen
);
218 if (NS_SUCCEEDED(rv
))
219 _retval
.Assign(ustr
, dstLen
);
221 nsMemory::Free(ustr
);
226 NS_IMETHODIMP
nsTextToSubURI::UnEscapeURIForUI(const nsACString
& aCharset
,
227 const nsACString
&aURIFragment
,
230 nsCAutoString unescapedSpec
;
231 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
232 NS_UnescapeURL(PromiseFlatCString(aURIFragment
),
233 esc_SkipControl
| esc_AlwaysCopy
, unescapedSpec
);
235 // in case of failure, return escaped URI
236 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
237 // sequences are also considered failure in this context
238 if (convertURItoUnicode(
239 PromiseFlatCString(aCharset
), unescapedSpec
, PR_TRUE
, _retval
)
241 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
242 CopyUTF8toUTF16(aURIFragment
, _retval
);
246 NS_IMETHODIMP
nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString
& aCharset
,
247 const nsACString
&aURIFragment
,
250 nsCAutoString unescapedSpec
;
251 NS_UnescapeURL(PromiseFlatCString(aURIFragment
),
252 esc_AlwaysCopy
| esc_OnlyNonASCII
, unescapedSpec
);
254 return convertURItoUnicode(PromiseFlatCString(aCharset
), unescapedSpec
, PR_TRUE
, _retval
);
257 //----------------------------------------------------------------------