1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2002
20 * the Initial Developer. All Rights Reserved.
23 * Naoki Hotta <nhotta@netscape.com> (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsIDNService.h"
40 #include "nsReadableUtils.h"
42 #include "nsUnicharUtils.h"
43 #include "nsIServiceManager.h"
44 #include "nsIPrefService.h"
45 #include "nsIPrefBranch.h"
46 #include "nsIPrefBranch2.h"
47 #include "nsIObserverService.h"
48 #include "nsISupportsPrimitives.h"
51 //-----------------------------------------------------------------------------
52 // RFC 1034 - 3.1. Name space specifications and terminology
53 static const PRUint32 kMaxDNSNodeLen
= 63;
55 //-----------------------------------------------------------------------------
57 #define NS_NET_PREF_IDNTESTBED "network.IDN_testbed"
58 #define NS_NET_PREF_IDNPREFIX "network.IDN_prefix"
59 #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
60 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
61 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
63 inline PRBool
isOnlySafeChars(const nsAFlatString
& in
,
64 const nsAFlatString
& blacklist
)
66 return (blacklist
.IsEmpty() ||
67 in
.FindCharInSet(blacklist
) == kNotFound
);
70 //-----------------------------------------------------------------------------
72 //-----------------------------------------------------------------------------
74 /* Implementation file */
75 NS_IMPL_THREADSAFE_ISUPPORTS3(nsIDNService
,
78 nsISupportsWeakReference
)
80 nsresult
nsIDNService::Init()
82 nsCOMPtr
<nsIPrefService
> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID
));
84 prefs
->GetBranch(NS_NET_PREF_IDNWHITELIST
, getter_AddRefs(mIDNWhitelistPrefBranch
));
86 nsCOMPtr
<nsIPrefBranch2
> prefInternal(do_QueryInterface(prefs
));
88 prefInternal
->AddObserver(NS_NET_PREF_IDNTESTBED
, this, PR_TRUE
);
89 prefInternal
->AddObserver(NS_NET_PREF_IDNPREFIX
, this, PR_TRUE
);
90 prefInternal
->AddObserver(NS_NET_PREF_IDNBLACKLIST
, this, PR_TRUE
);
91 prefInternal
->AddObserver(NS_NET_PREF_SHOWPUNYCODE
, this, PR_TRUE
);
92 prefsChanged(prefInternal
, nsnull
);
98 NS_IMETHODIMP
nsIDNService::Observe(nsISupports
*aSubject
,
100 const PRUnichar
*aData
)
102 if (!strcmp(aTopic
, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID
)) {
103 nsCOMPtr
<nsIPrefBranch
> prefBranch( do_QueryInterface(aSubject
) );
105 prefsChanged(prefBranch
, aData
);
110 void nsIDNService::prefsChanged(nsIPrefBranch
*prefBranch
, const PRUnichar
*pref
)
112 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNTESTBED
).Equals(pref
)) {
114 if (NS_SUCCEEDED(prefBranch
->GetBoolPref(NS_NET_PREF_IDNTESTBED
, &val
)))
115 mMultilingualTestBed
= val
;
117 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNPREFIX
).Equals(pref
)) {
118 nsXPIDLCString prefix
;
119 nsresult rv
= prefBranch
->GetCharPref(NS_NET_PREF_IDNPREFIX
, getter_Copies(prefix
));
120 if (NS_SUCCEEDED(rv
) && prefix
.Length() <= kACEPrefixLen
)
121 PL_strncpyz(nsIDNService::mACEPrefix
, prefix
.get(), kACEPrefixLen
+ 1);
123 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST
).Equals(pref
)) {
124 nsCOMPtr
<nsISupportsString
> blacklist
;
125 nsresult rv
= prefBranch
->GetComplexValue(NS_NET_PREF_IDNBLACKLIST
,
126 NS_GET_IID(nsISupportsString
),
127 getter_AddRefs(blacklist
));
128 if (NS_SUCCEEDED(rv
))
129 blacklist
->ToString(getter_Copies(mIDNBlacklist
));
131 mIDNBlacklist
.Truncate();
133 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE
).Equals(pref
)) {
135 if (NS_SUCCEEDED(prefBranch
->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE
, &val
)))
140 nsIDNService::nsIDNService()
142 // initialize to the official prefix (RFC 3490 "5. ACE prefix")
143 const char kIDNSPrefix
[] = "xn--";
144 strcpy(mACEPrefix
, kIDNSPrefix
);
146 mMultilingualTestBed
= PR_FALSE
;
148 if (idn_success
!= idn_nameprep_create(NULL
, &mNamePrepHandle
))
149 mNamePrepHandle
= nsnull
;
151 mNormalizer
= do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID
);
152 /* member initializers and constructor code */
155 nsIDNService::~nsIDNService()
157 idn_nameprep_destroy(mNamePrepHandle
);
160 /* ACString ConvertUTF8toACE (in AUTF8String input); */
161 NS_IMETHODIMP
nsIDNService::ConvertUTF8toACE(const nsACString
& input
, nsACString
& ace
)
164 NS_ConvertUTF8toUTF16
ustr(input
);
166 // map ideographic period to ASCII period etc.
167 normalizeFullStops(ustr
);
170 PRUint32 len
, offset
;
173 nsCAutoString encodedBuf
;
175 nsAString::const_iterator start
, end
;
176 ustr
.BeginReading(start
);
177 ustr
.EndReading(end
);
180 // encode nodes if non ASCII
181 while (start
!= end
) {
183 if (*start
++ == (PRUnichar
)'.') {
184 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
- 1), encodedBuf
);
185 NS_ENSURE_SUCCESS(rv
, rv
);
187 ace
.Append(encodedBuf
);
194 // add extra node for multilingual test bed
195 if (mMultilingualTestBed
)
196 ace
.AppendLiteral("mltbd.");
197 // encode the last node if non ASCII
199 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
), encodedBuf
);
200 NS_ENSURE_SUCCESS(rv
, rv
);
202 ace
.Append(encodedBuf
);
208 /* AUTF8String convertACEtoUTF8(in ACString input); */
209 NS_IMETHODIMP
nsIDNService::ConvertACEtoUTF8(const nsACString
& input
, nsACString
& _retval
)
211 // RFC 3490 - 4.2 ToUnicode
212 // ToUnicode never fails. If any step fails, then the original input
213 // sequence is returned immediately in that step.
215 if (!IsASCII(input
)) {
216 _retval
.Assign(input
);
220 PRUint32 len
= 0, offset
= 0;
221 nsCAutoString decodedBuf
;
223 nsACString::const_iterator start
, end
;
224 input
.BeginReading(start
);
225 input
.EndReading(end
);
228 // loop and decode nodes
229 while (start
!= end
) {
231 if (*start
++ == '.') {
232 if (NS_FAILED(decodeACE(Substring(input
, offset
, len
- 1), decodedBuf
))) {
233 _retval
.Assign(input
);
237 _retval
.Append(decodedBuf
);
243 // decode the last node
245 if (NS_FAILED(decodeACE(Substring(input
, offset
, len
), decodedBuf
)))
246 _retval
.Assign(input
);
248 _retval
.Append(decodedBuf
);
254 /* boolean isACE(in ACString input); */
255 NS_IMETHODIMP
nsIDNService::IsACE(const nsACString
& input
, PRBool
*_retval
)
257 nsACString::const_iterator begin
;
258 input
.BeginReading(begin
);
260 const char *data
= begin
.get();
261 PRUint32 dataLen
= begin
.size_forward();
263 // look for the ACE prefix in the input string. it may occur
264 // at the beginning of any segment in the domain name. for
265 // example: "www.xn--ENCODED.com"
267 const char *p
= PL_strncasestr(data
, mACEPrefix
, dataLen
);
269 *_retval
= p
&& (p
== data
|| *(p
- 1) == '.');
273 /* AUTF8String normalize(in AUTF8String input); */
274 NS_IMETHODIMP
nsIDNService::Normalize(const nsACString
& input
, nsACString
& output
)
276 // protect against bogus input
277 NS_ENSURE_TRUE(IsUTF8(input
), NS_ERROR_UNEXPECTED
);
279 NS_ConvertUTF8toUTF16
inUTF16(input
);
280 normalizeFullStops(inUTF16
);
282 // pass the domain name to stringprep label by label
283 nsAutoString outUTF16
, outLabel
;
285 PRUint32 len
= 0, offset
= 0;
287 nsAString::const_iterator start
, end
;
288 inUTF16
.BeginReading(start
);
289 inUTF16
.EndReading(end
);
291 while (start
!= end
) {
293 if (*start
++ == PRUnichar('.')) {
294 rv
= stringPrep(Substring(inUTF16
, offset
, len
- 1), outLabel
);
295 NS_ENSURE_SUCCESS(rv
, rv
);
297 outUTF16
.Append(outLabel
);
298 outUTF16
.Append(PRUnichar('.'));
304 rv
= stringPrep(Substring(inUTF16
, offset
, len
), outLabel
);
305 NS_ENSURE_SUCCESS(rv
, rv
);
307 outUTF16
.Append(outLabel
);
310 CopyUTF16toUTF8(outUTF16
, output
);
311 if (!isOnlySafeChars(outUTF16
, mIDNBlacklist
))
312 return ConvertUTF8toACE(output
, output
);
317 NS_IMETHODIMP
nsIDNService::ConvertToDisplayIDN(const nsACString
& input
, PRBool
* _isASCII
, nsACString
& _retval
)
319 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
320 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
324 if (IsASCII(input
)) {
325 // first, canonicalize the host to lowercase, for whitelist lookup
327 ToLowerCase(_retval
);
330 IsACE(_retval
, &isACE
);
332 if (isACE
&& !mShowPunycode
&& isInWhitelist(_retval
)) {
333 // ConvertACEtoUTF8() can't fail, but might return the original ACE string
334 nsCAutoString
temp(_retval
);
335 ConvertACEtoUTF8(temp
, _retval
);
336 *_isASCII
= IsASCII(_retval
);
341 // We have to normalize the hostname before testing against the domain
342 // whitelist (see bug 315411), and to ensure the entire string gets
344 rv
= Normalize(input
, _retval
);
345 if (NS_FAILED(rv
)) return rv
;
347 if (mShowPunycode
&& NS_SUCCEEDED(ConvertUTF8toACE(_retval
, _retval
))) {
352 // normalization could result in an ASCII-only hostname. alternatively, if
353 // the host is converted to ACE by the normalizer, then the host may contain
354 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
355 *_isASCII
= IsASCII(_retval
);
356 if (!*_isASCII
&& !isInWhitelist(_retval
)) {
358 return ConvertUTF8toACE(_retval
, _retval
);
365 //-----------------------------------------------------------------------------
367 static void utf16ToUcs4(const nsAString
& in
, PRUint32
*out
, PRUint32 outBufLen
, PRUint32
*outLen
)
370 nsAString::const_iterator start
, end
;
371 in
.BeginReading(start
);
374 while (start
!= end
) {
380 NS_IS_HIGH_SURROGATE(curChar
) &&
381 NS_IS_LOW_SURROGATE(*start
)) {
382 out
[i
] = SURROGATE_TO_UCS4(curChar
, *start
);
389 if (i
>= outBufLen
) {
390 NS_ERROR("input too big, the result truncated");
391 out
[outBufLen
-1] = (PRUint32
)'\0';
392 *outLen
= outBufLen
-1;
396 out
[i
] = (PRUint32
)'\0';
400 static void ucs4toUtf16(const PRUint32
*in
, nsAString
& out
)
403 if (!IS_IN_BMP(*in
)) {
404 out
.Append((PRUnichar
) H_SURROGATE(*in
));
405 out
.Append((PRUnichar
) L_SURROGATE(*in
));
408 out
.Append((PRUnichar
) *in
);
413 static nsresult
punycode(const char* prefix
, const nsAString
& in
, nsACString
& out
)
415 PRUint32 ucs4Buf
[kMaxDNSNodeLen
+ 1];
417 utf16ToUcs4(in
, ucs4Buf
, kMaxDNSNodeLen
, &ucs4Len
);
419 // need maximum 20 bits to encode 16 bit Unicode character
420 // (include null terminator)
421 const PRUint32 kEncodedBufSize
= kMaxDNSNodeLen
* 20 / 8 + 1 + 1;
422 char encodedBuf
[kEncodedBufSize
];
423 punycode_uint encodedLength
= kEncodedBufSize
;
425 enum punycode_status status
= punycode_encode(ucs4Len
,
431 if (punycode_success
!= status
||
432 encodedLength
>= kEncodedBufSize
)
433 return NS_ERROR_FAILURE
;
435 encodedBuf
[encodedLength
] = '\0';
436 out
.Assign(nsDependentCString(prefix
) + nsDependentCString(encodedBuf
));
441 static nsresult
encodeToRACE(const char* prefix
, const nsAString
& in
, nsACString
& out
)
443 // need maximum 20 bits to encode 16 bit Unicode character
444 // (include null terminator)
445 const PRUint32 kEncodedBufSize
= kMaxDNSNodeLen
* 20 / 8 + 1 + 1;
447 // set up a work buffer for RACE encoder
448 PRUnichar temp
[kMaxDNSNodeLen
+ 2];
449 temp
[0] = 0xFFFF; // set a place holder (to be filled by get_compress_mode)
450 temp
[in
.Length() + 1] = (PRUnichar
)'\0';
452 nsAString::const_iterator start
, end
;
453 in
.BeginReading(start
);
456 for (PRUint32 i
= 1; start
!= end
; i
++)
459 // encode nodes if non ASCII
461 char encodedBuf
[kEncodedBufSize
];
462 idn_result_t result
= race_compress_encode((const unsigned short *) temp
,
463 get_compress_mode((unsigned short *) temp
+ 1),
464 encodedBuf
, kEncodedBufSize
);
465 if (idn_success
!= result
)
466 return NS_ERROR_FAILURE
;
469 out
.Append(encodedBuf
);
476 // 1) Map -- For each character in the input, check if it has a mapping
477 // and, if so, replace it with its mapping. This is described in section 3.
479 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
480 // normalization. This is described in section 4.
482 // 3) Prohibit -- Check for any characters that are not allowed in the
483 // output. If any are found, return an error. This is described in section
486 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
487 // are found, make sure that the whole string satisfies the requirements
488 // for bidirectional strings. If the string does not satisfy the requirements
489 // for bidirectional strings, return an error. This is described in section 6.
491 nsresult
nsIDNService::stringPrep(const nsAString
& in
, nsAString
& out
)
493 if (!mNamePrepHandle
|| !mNormalizer
)
494 return NS_ERROR_FAILURE
;
497 PRUint32 ucs4Buf
[kMaxDNSNodeLen
+ 1];
499 utf16ToUcs4(in
, ucs4Buf
, kMaxDNSNodeLen
, &ucs4Len
);
502 idn_result_t idn_err
;
504 PRUint32 namePrepBuf
[kMaxDNSNodeLen
* 3]; // map up to three characters
505 idn_err
= idn_nameprep_map(mNamePrepHandle
, (const PRUint32
*) ucs4Buf
,
506 (PRUint32
*) namePrepBuf
, kMaxDNSNodeLen
* 3);
507 NS_ENSURE_TRUE(idn_err
== idn_success
, NS_ERROR_FAILURE
);
509 nsAutoString namePrepStr
;
510 ucs4toUtf16(namePrepBuf
, namePrepStr
);
511 if (namePrepStr
.Length() >= kMaxDNSNodeLen
)
512 return NS_ERROR_FAILURE
;
515 nsAutoString normlizedStr
;
516 rv
= mNormalizer
->NormalizeUnicodeNFKC(namePrepStr
, normlizedStr
);
517 if (normlizedStr
.Length() >= kMaxDNSNodeLen
)
518 return NS_ERROR_FAILURE
;
521 const PRUint32
*found
= nsnull
;
522 idn_err
= idn_nameprep_isprohibited(mNamePrepHandle
,
523 (const PRUint32
*) ucs4Buf
, &found
);
524 if (idn_err
!= idn_success
|| found
)
525 return NS_ERROR_FAILURE
;
528 idn_err
= idn_nameprep_isvalidbidi(mNamePrepHandle
,
529 (const PRUint32
*) ucs4Buf
, &found
);
530 if (idn_err
!= idn_success
|| found
)
531 return NS_ERROR_FAILURE
;
533 // set the result string
534 out
.Assign(normlizedStr
);
539 nsresult
nsIDNService::encodeToACE(const nsAString
& in
, nsACString
& out
)
541 // RACE encode is supported for existing testing environment
542 if (!strcmp("bq--", mACEPrefix
))
543 return encodeToRACE(mACEPrefix
, in
, out
);
546 return punycode(mACEPrefix
, in
, out
);
549 nsresult
nsIDNService::stringPrepAndACE(const nsAString
& in
, nsACString
& out
)
555 if (in
.Length() > kMaxDNSNodeLen
) {
556 NS_ERROR("IDN node too large");
557 return NS_ERROR_FAILURE
;
561 LossyCopyUTF16toASCII(in
, out
);
563 nsAutoString strPrep
;
564 rv
= stringPrep(in
, strPrep
);
565 if (NS_SUCCEEDED(rv
)) {
566 if (IsASCII(strPrep
))
567 LossyCopyUTF16toASCII(strPrep
, out
);
569 rv
= encodeToACE(strPrep
, out
);
573 if (out
.Length() > kMaxDNSNodeLen
) {
574 NS_ERROR("IDN node too large");
575 return NS_ERROR_FAILURE
;
582 // 1) Whenever dots are used as label separators, the following characters
583 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
584 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
587 void nsIDNService::normalizeFullStops(nsAString
& s
)
589 nsAString::const_iterator start
, end
;
590 s
.BeginReading(start
);
594 while (start
!= end
) {
599 s
.Replace(index
, 1, NS_LITERAL_STRING("."));
609 nsresult
nsIDNService::decodeACE(const nsACString
& in
, nsACString
& out
)
618 // RFC 3490 - 4.2 ToUnicode
619 // The ToUnicode output never contains more code points than its input.
620 punycode_uint output_length
= in
.Length() - kACEPrefixLen
+ 1;
621 punycode_uint
*output
= new punycode_uint
[output_length
];
622 NS_ENSURE_TRUE(output
, NS_ERROR_OUT_OF_MEMORY
);
624 enum punycode_status status
= punycode_decode(in
.Length() - kACEPrefixLen
,
625 PromiseFlatCString(in
).get() + kACEPrefixLen
,
629 if (status
!= punycode_success
) {
631 return NS_ERROR_FAILURE
;
635 output
[output_length
] = 0;
637 ucs4toUtf16(output
, utf16
);
639 if (!isOnlySafeChars(utf16
, mIDNBlacklist
))
640 return NS_ERROR_FAILURE
;
641 CopyUTF16toUTF8(utf16
, out
);
643 // Validation: encode back to ACE and compare the strings
645 nsresult rv
= ConvertUTF8toACE(out
, ace
);
646 NS_ENSURE_SUCCESS(rv
, rv
);
648 if (!ace
.Equals(in
, nsCaseInsensitiveCStringComparator()))
649 return NS_ERROR_FAILURE
;
654 PRBool
nsIDNService::isInWhitelist(const nsACString
&host
)
656 if (mIDNWhitelistPrefBranch
) {
657 // truncate trailing dots first
658 nsCAutoString
tld(host
);
660 PRInt32 pos
= tld
.RFind(".");
661 if (pos
== kNotFound
)
666 // make sure the TLD is ACE for lookup.
668 NS_FAILED(ConvertUTF8toACE(tld
, tld
)))
672 if (NS_SUCCEEDED(mIDNWhitelistPrefBranch
->GetBoolPref(tld
.get(), &safe
)))