Bug 460926 A11y hierachy is broken on Ubuntu 8.10 (GNOME 2.24), r=Evan.Yan sr=roc
[wine-gecko.git] / netwerk / dns / src / nsIDNService.cpp
blobf81c248995e82abfb0ecf4e1e1cd22fd0384cd42
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2002
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Naoki Hotta <nhotta@netscape.com> (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsIDNService.h"
40 #include "nsReadableUtils.h"
41 #include "nsCRT.h"
42 #include "nsUnicharUtils.h"
43 #include "nsIServiceManager.h"
44 #include "nsIPrefService.h"
45 #include "nsIPrefBranch.h"
46 #include "nsIPrefBranch2.h"
47 #include "nsIObserverService.h"
48 #include "nsISupportsPrimitives.h"
49 #include "punycode.h"
51 //-----------------------------------------------------------------------------
52 // RFC 1034 - 3.1. Name space specifications and terminology
53 static const PRUint32 kMaxDNSNodeLen = 63;
55 //-----------------------------------------------------------------------------
57 #define NS_NET_PREF_IDNTESTBED "network.IDN_testbed"
58 #define NS_NET_PREF_IDNPREFIX "network.IDN_prefix"
59 #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
60 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
61 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
63 inline PRBool isOnlySafeChars(const nsAFlatString& in,
64 const nsAFlatString& blacklist)
66 return (blacklist.IsEmpty() ||
67 in.FindCharInSet(blacklist) == kNotFound);
70 //-----------------------------------------------------------------------------
71 // nsIDNService
72 //-----------------------------------------------------------------------------
74 /* Implementation file */
75 NS_IMPL_THREADSAFE_ISUPPORTS3(nsIDNService,
76 nsIIDNService,
77 nsIObserver,
78 nsISupportsWeakReference)
80 nsresult nsIDNService::Init()
82 nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
83 if (prefs)
84 prefs->GetBranch(NS_NET_PREF_IDNWHITELIST, getter_AddRefs(mIDNWhitelistPrefBranch));
86 nsCOMPtr<nsIPrefBranch2> prefInternal(do_QueryInterface(prefs));
87 if (prefInternal) {
88 prefInternal->AddObserver(NS_NET_PREF_IDNTESTBED, this, PR_TRUE);
89 prefInternal->AddObserver(NS_NET_PREF_IDNPREFIX, this, PR_TRUE);
90 prefInternal->AddObserver(NS_NET_PREF_IDNBLACKLIST, this, PR_TRUE);
91 prefInternal->AddObserver(NS_NET_PREF_SHOWPUNYCODE, this, PR_TRUE);
92 prefsChanged(prefInternal, nsnull);
95 return NS_OK;
98 NS_IMETHODIMP nsIDNService::Observe(nsISupports *aSubject,
99 const char *aTopic,
100 const PRUnichar *aData)
102 if (!strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID)) {
103 nsCOMPtr<nsIPrefBranch> prefBranch( do_QueryInterface(aSubject) );
104 if (prefBranch)
105 prefsChanged(prefBranch, aData);
107 return NS_OK;
110 void nsIDNService::prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref)
112 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNTESTBED).Equals(pref)) {
113 PRBool val;
114 if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_IDNTESTBED, &val)))
115 mMultilingualTestBed = val;
117 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNPREFIX).Equals(pref)) {
118 nsXPIDLCString prefix;
119 nsresult rv = prefBranch->GetCharPref(NS_NET_PREF_IDNPREFIX, getter_Copies(prefix));
120 if (NS_SUCCEEDED(rv) && prefix.Length() <= kACEPrefixLen)
121 PL_strncpyz(nsIDNService::mACEPrefix, prefix.get(), kACEPrefixLen + 1);
123 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST).Equals(pref)) {
124 nsCOMPtr<nsISupportsString> blacklist;
125 nsresult rv = prefBranch->GetComplexValue(NS_NET_PREF_IDNBLACKLIST,
126 NS_GET_IID(nsISupportsString),
127 getter_AddRefs(blacklist));
128 if (NS_SUCCEEDED(rv))
129 blacklist->ToString(getter_Copies(mIDNBlacklist));
130 else
131 mIDNBlacklist.Truncate();
133 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
134 PRBool val;
135 if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE, &val)))
136 mShowPunycode = val;
140 nsIDNService::nsIDNService()
142 // initialize to the official prefix (RFC 3490 "5. ACE prefix")
143 const char kIDNSPrefix[] = "xn--";
144 strcpy(mACEPrefix, kIDNSPrefix);
146 mMultilingualTestBed = PR_FALSE;
148 if (idn_success != idn_nameprep_create(NULL, &mNamePrepHandle))
149 mNamePrepHandle = nsnull;
151 mNormalizer = do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID);
152 /* member initializers and constructor code */
155 nsIDNService::~nsIDNService()
157 idn_nameprep_destroy(mNamePrepHandle);
160 /* ACString ConvertUTF8toACE (in AUTF8String input); */
161 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
163 nsresult rv;
164 NS_ConvertUTF8toUTF16 ustr(input);
166 // map ideographic period to ASCII period etc.
167 normalizeFullStops(ustr);
170 PRUint32 len, offset;
171 len = 0;
172 offset = 0;
173 nsCAutoString encodedBuf;
175 nsAString::const_iterator start, end;
176 ustr.BeginReading(start);
177 ustr.EndReading(end);
178 ace.Truncate();
180 // encode nodes if non ASCII
181 while (start != end) {
182 len++;
183 if (*start++ == (PRUnichar)'.') {
184 rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf);
185 NS_ENSURE_SUCCESS(rv, rv);
187 ace.Append(encodedBuf);
188 ace.Append('.');
189 offset += len;
190 len = 0;
194 // add extra node for multilingual test bed
195 if (mMultilingualTestBed)
196 ace.AppendLiteral("mltbd.");
197 // encode the last node if non ASCII
198 if (len) {
199 rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf);
200 NS_ENSURE_SUCCESS(rv, rv);
202 ace.Append(encodedBuf);
205 return NS_OK;
208 /* AUTF8String convertACEtoUTF8(in ACString input); */
209 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
211 // RFC 3490 - 4.2 ToUnicode
212 // ToUnicode never fails. If any step fails, then the original input
213 // sequence is returned immediately in that step.
215 if (!IsASCII(input)) {
216 _retval.Assign(input);
217 return NS_OK;
220 PRUint32 len = 0, offset = 0;
221 nsCAutoString decodedBuf;
223 nsACString::const_iterator start, end;
224 input.BeginReading(start);
225 input.EndReading(end);
226 _retval.Truncate();
228 // loop and decode nodes
229 while (start != end) {
230 len++;
231 if (*start++ == '.') {
232 if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf))) {
233 _retval.Assign(input);
234 return NS_OK;
237 _retval.Append(decodedBuf);
238 _retval.Append('.');
239 offset += len;
240 len = 0;
243 // decode the last node
244 if (len) {
245 if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf)))
246 _retval.Assign(input);
247 else
248 _retval.Append(decodedBuf);
251 return NS_OK;
254 /* boolean isACE(in ACString input); */
255 NS_IMETHODIMP nsIDNService::IsACE(const nsACString & input, PRBool *_retval)
257 nsACString::const_iterator begin;
258 input.BeginReading(begin);
260 const char *data = begin.get();
261 PRUint32 dataLen = begin.size_forward();
263 // look for the ACE prefix in the input string. it may occur
264 // at the beginning of any segment in the domain name. for
265 // example: "www.xn--ENCODED.com"
267 const char *p = PL_strncasestr(data, mACEPrefix, dataLen);
269 *_retval = p && (p == data || *(p - 1) == '.');
270 return NS_OK;
273 /* AUTF8String normalize(in AUTF8String input); */
274 NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & output)
276 // protect against bogus input
277 NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
279 NS_ConvertUTF8toUTF16 inUTF16(input);
280 normalizeFullStops(inUTF16);
282 // pass the domain name to stringprep label by label
283 nsAutoString outUTF16, outLabel;
285 PRUint32 len = 0, offset = 0;
286 nsresult rv;
287 nsAString::const_iterator start, end;
288 inUTF16.BeginReading(start);
289 inUTF16.EndReading(end);
291 while (start != end) {
292 len++;
293 if (*start++ == PRUnichar('.')) {
294 rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel);
295 NS_ENSURE_SUCCESS(rv, rv);
297 outUTF16.Append(outLabel);
298 outUTF16.Append(PRUnichar('.'));
299 offset += len;
300 len = 0;
303 if (len) {
304 rv = stringPrep(Substring(inUTF16, offset, len), outLabel);
305 NS_ENSURE_SUCCESS(rv, rv);
307 outUTF16.Append(outLabel);
310 CopyUTF16toUTF8(outUTF16, output);
311 if (!isOnlySafeChars(outUTF16, mIDNBlacklist))
312 return ConvertUTF8toACE(output, output);
314 return NS_OK;
317 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, PRBool * _isASCII, nsACString & _retval)
319 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
320 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
322 nsresult rv;
324 if (IsASCII(input)) {
325 // first, canonicalize the host to lowercase, for whitelist lookup
326 _retval = input;
327 ToLowerCase(_retval);
329 PRBool isACE;
330 IsACE(_retval, &isACE);
332 if (isACE && !mShowPunycode && isInWhitelist(_retval)) {
333 // ConvertACEtoUTF8() can't fail, but might return the original ACE string
334 nsCAutoString temp(_retval);
335 ConvertACEtoUTF8(temp, _retval);
336 *_isASCII = IsASCII(_retval);
337 } else {
338 *_isASCII = PR_TRUE;
340 } else {
341 // We have to normalize the hostname before testing against the domain
342 // whitelist (see bug 315411), and to ensure the entire string gets
343 // normalized.
344 rv = Normalize(input, _retval);
345 if (NS_FAILED(rv)) return rv;
347 if (mShowPunycode && NS_SUCCEEDED(ConvertUTF8toACE(_retval, _retval))) {
348 *_isASCII = PR_TRUE;
349 return NS_OK;
352 // normalization could result in an ASCII-only hostname. alternatively, if
353 // the host is converted to ACE by the normalizer, then the host may contain
354 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
355 *_isASCII = IsASCII(_retval);
356 if (!*_isASCII && !isInWhitelist(_retval)) {
357 *_isASCII = PR_TRUE;
358 return ConvertUTF8toACE(_retval, _retval);
362 return NS_OK;
365 //-----------------------------------------------------------------------------
367 static void utf16ToUcs4(const nsAString& in, PRUint32 *out, PRUint32 outBufLen, PRUint32 *outLen)
369 PRUint32 i = 0;
370 nsAString::const_iterator start, end;
371 in.BeginReading(start);
372 in.EndReading(end);
374 while (start != end) {
375 PRUnichar curChar;
377 curChar= *start++;
379 if (start != end &&
380 NS_IS_HIGH_SURROGATE(curChar) &&
381 NS_IS_LOW_SURROGATE(*start)) {
382 out[i] = SURROGATE_TO_UCS4(curChar, *start);
383 ++start;
385 else
386 out[i] = curChar;
388 i++;
389 if (i >= outBufLen) {
390 NS_ERROR("input too big, the result truncated");
391 out[outBufLen-1] = (PRUint32)'\0';
392 *outLen = outBufLen-1;
393 return;
396 out[i] = (PRUint32)'\0';
397 *outLen = i;
400 static void ucs4toUtf16(const PRUint32 *in, nsAString& out)
402 while (*in) {
403 if (!IS_IN_BMP(*in)) {
404 out.Append((PRUnichar) H_SURROGATE(*in));
405 out.Append((PRUnichar) L_SURROGATE(*in));
407 else
408 out.Append((PRUnichar) *in);
409 in++;
413 static nsresult punycode(const char* prefix, const nsAString& in, nsACString& out)
415 PRUint32 ucs4Buf[kMaxDNSNodeLen + 1];
416 PRUint32 ucs4Len;
417 utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
419 // need maximum 20 bits to encode 16 bit Unicode character
420 // (include null terminator)
421 const PRUint32 kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
422 char encodedBuf[kEncodedBufSize];
423 punycode_uint encodedLength = kEncodedBufSize;
425 enum punycode_status status = punycode_encode(ucs4Len,
426 ucs4Buf,
427 nsnull,
428 &encodedLength,
429 encodedBuf);
431 if (punycode_success != status ||
432 encodedLength >= kEncodedBufSize)
433 return NS_ERROR_FAILURE;
435 encodedBuf[encodedLength] = '\0';
436 out.Assign(nsDependentCString(prefix) + nsDependentCString(encodedBuf));
438 return NS_OK;
441 static nsresult encodeToRACE(const char* prefix, const nsAString& in, nsACString& out)
443 // need maximum 20 bits to encode 16 bit Unicode character
444 // (include null terminator)
445 const PRUint32 kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
447 // set up a work buffer for RACE encoder
448 PRUnichar temp[kMaxDNSNodeLen + 2];
449 temp[0] = 0xFFFF; // set a place holder (to be filled by get_compress_mode)
450 temp[in.Length() + 1] = (PRUnichar)'\0';
452 nsAString::const_iterator start, end;
453 in.BeginReading(start);
454 in.EndReading(end);
456 for (PRUint32 i = 1; start != end; i++)
457 temp[i] = *start++;
459 // encode nodes if non ASCII
461 char encodedBuf[kEncodedBufSize];
462 idn_result_t result = race_compress_encode((const unsigned short *) temp,
463 get_compress_mode((unsigned short *) temp + 1),
464 encodedBuf, kEncodedBufSize);
465 if (idn_success != result)
466 return NS_ERROR_FAILURE;
468 out.Assign(prefix);
469 out.Append(encodedBuf);
471 return NS_OK;
474 // RFC 3454
476 // 1) Map -- For each character in the input, check if it has a mapping
477 // and, if so, replace it with its mapping. This is described in section 3.
479 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
480 // normalization. This is described in section 4.
482 // 3) Prohibit -- Check for any characters that are not allowed in the
483 // output. If any are found, return an error. This is described in section
484 // 5.
486 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
487 // are found, make sure that the whole string satisfies the requirements
488 // for bidirectional strings. If the string does not satisfy the requirements
489 // for bidirectional strings, return an error. This is described in section 6.
491 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out)
493 if (!mNamePrepHandle || !mNormalizer)
494 return NS_ERROR_FAILURE;
496 nsresult rv = NS_OK;
497 PRUint32 ucs4Buf[kMaxDNSNodeLen + 1];
498 PRUint32 ucs4Len;
499 utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
501 // map
502 idn_result_t idn_err;
504 PRUint32 namePrepBuf[kMaxDNSNodeLen * 3]; // map up to three characters
505 idn_err = idn_nameprep_map(mNamePrepHandle, (const PRUint32 *) ucs4Buf,
506 (PRUint32 *) namePrepBuf, kMaxDNSNodeLen * 3);
507 NS_ENSURE_TRUE(idn_err == idn_success, NS_ERROR_FAILURE);
509 nsAutoString namePrepStr;
510 ucs4toUtf16(namePrepBuf, namePrepStr);
511 if (namePrepStr.Length() >= kMaxDNSNodeLen)
512 return NS_ERROR_FAILURE;
514 // normalize
515 nsAutoString normlizedStr;
516 rv = mNormalizer->NormalizeUnicodeNFKC(namePrepStr, normlizedStr);
517 if (normlizedStr.Length() >= kMaxDNSNodeLen)
518 return NS_ERROR_FAILURE;
520 // prohibit
521 const PRUint32 *found = nsnull;
522 idn_err = idn_nameprep_isprohibited(mNamePrepHandle,
523 (const PRUint32 *) ucs4Buf, &found);
524 if (idn_err != idn_success || found)
525 return NS_ERROR_FAILURE;
527 // check bidi
528 idn_err = idn_nameprep_isvalidbidi(mNamePrepHandle,
529 (const PRUint32 *) ucs4Buf, &found);
530 if (idn_err != idn_success || found)
531 return NS_ERROR_FAILURE;
533 // set the result string
534 out.Assign(normlizedStr);
536 return rv;
539 nsresult nsIDNService::encodeToACE(const nsAString& in, nsACString& out)
541 // RACE encode is supported for existing testing environment
542 if (!strcmp("bq--", mACEPrefix))
543 return encodeToRACE(mACEPrefix, in, out);
545 // use punycoce
546 return punycode(mACEPrefix, in, out);
549 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out)
551 nsresult rv = NS_OK;
553 out.Truncate();
555 if (in.Length() > kMaxDNSNodeLen) {
556 NS_ERROR("IDN node too large");
557 return NS_ERROR_FAILURE;
560 if (IsASCII(in))
561 LossyCopyUTF16toASCII(in, out);
562 else {
563 nsAutoString strPrep;
564 rv = stringPrep(in, strPrep);
565 if (NS_SUCCEEDED(rv)) {
566 if (IsASCII(strPrep))
567 LossyCopyUTF16toASCII(strPrep, out);
568 else
569 rv = encodeToACE(strPrep, out);
573 if (out.Length() > kMaxDNSNodeLen) {
574 NS_ERROR("IDN node too large");
575 return NS_ERROR_FAILURE;
578 return rv;
581 // RFC 3490
582 // 1) Whenever dots are used as label separators, the following characters
583 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
584 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
585 // stop).
587 void nsIDNService::normalizeFullStops(nsAString& s)
589 nsAString::const_iterator start, end;
590 s.BeginReading(start);
591 s.EndReading(end);
592 PRInt32 index = 0;
594 while (start != end) {
595 switch (*start) {
596 case 0x3002:
597 case 0xFF0E:
598 case 0xFF61:
599 s.Replace(index, 1, NS_LITERAL_STRING("."));
600 break;
601 default:
602 break;
604 start++;
605 index++;
609 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
611 PRBool isAce;
612 IsACE(in, &isAce);
613 if (!isAce) {
614 out.Assign(in);
615 return NS_OK;
618 // RFC 3490 - 4.2 ToUnicode
619 // The ToUnicode output never contains more code points than its input.
620 punycode_uint output_length = in.Length() - kACEPrefixLen + 1;
621 punycode_uint *output = new punycode_uint[output_length];
622 NS_ENSURE_TRUE(output, NS_ERROR_OUT_OF_MEMORY);
624 enum punycode_status status = punycode_decode(in.Length() - kACEPrefixLen,
625 PromiseFlatCString(in).get() + kACEPrefixLen,
626 &output_length,
627 output,
628 nsnull);
629 if (status != punycode_success) {
630 delete [] output;
631 return NS_ERROR_FAILURE;
634 // UCS4 -> UTF8
635 output[output_length] = 0;
636 nsAutoString utf16;
637 ucs4toUtf16(output, utf16);
638 delete [] output;
639 if (!isOnlySafeChars(utf16, mIDNBlacklist))
640 return NS_ERROR_FAILURE;
641 CopyUTF16toUTF8(utf16, out);
643 // Validation: encode back to ACE and compare the strings
644 nsCAutoString ace;
645 nsresult rv = ConvertUTF8toACE(out, ace);
646 NS_ENSURE_SUCCESS(rv, rv);
648 if (!ace.Equals(in, nsCaseInsensitiveCStringComparator()))
649 return NS_ERROR_FAILURE;
651 return NS_OK;
654 PRBool nsIDNService::isInWhitelist(const nsACString &host)
656 if (mIDNWhitelistPrefBranch) {
657 // truncate trailing dots first
658 nsCAutoString tld(host);
659 tld.Trim(".");
660 PRInt32 pos = tld.RFind(".");
661 if (pos == kNotFound)
662 return PR_FALSE;
664 tld.Cut(0, pos + 1);
666 // make sure the TLD is ACE for lookup.
667 if (!IsASCII(tld) &&
668 NS_FAILED(ConvertUTF8toACE(tld, tld)))
669 return PR_FALSE;
671 PRBool safe;
672 if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe)))
673 return safe;
676 return PR_FALSE;