Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / netwerk / dns / src / nsEffectiveTLDService.cpp
blob07d4456bd075400c093cd9d336d93e4bbe92c90b
1 //* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is Mozilla Effective-TLD Service
17 * The Initial Developer of the Original Code is
18 * Google Inc.
19 * Portions created by the Initial Developer are Copyright (C) 2006
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Pamela Greene <pamg.bugs@gmail.com> (original author)
24 * Daniel Witte <dwitte@stanford.edu>
25 * Jeff Walden <jwalden+code@mit.edu>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 // This service reads a file of rules describing TLD-like domain names. For a
42 // complete description of the expected file format and parsing rules, see
43 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
45 #include "nsEffectiveTLDService.h"
46 #include "nsIIDNService.h"
47 #include "nsNetUtil.h"
48 #include "prnetdb.h"
50 NS_IMPL_ISUPPORTS1(nsEffectiveTLDService, nsIEffectiveTLDService)
52 // ----------------------------------------------------------------------
54 static const ETLDEntry gEntries[] =
55 #include "etld_data.inc"
58 // ----------------------------------------------------------------------
60 nsresult
61 nsEffectiveTLDService::Init()
63 // We'll probably have to rehash at least once, since nsTHashtable doesn't
64 // use a perfect hash, but at least we'll save a few rehashes along the way.
65 // Next optimization here is to precompute the hash using something like
66 // gperf, but one step at a time. :-)
67 if (!mHash.Init(NS_ARRAY_LENGTH(gEntries) - 1))
68 return NS_ERROR_OUT_OF_MEMORY;
70 nsresult rv;
71 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
72 if (NS_FAILED(rv)) return rv;
74 // Initialize eTLD hash from static array
75 for (PRUint32 i = 0; i < NS_ARRAY_LENGTH(gEntries) - 1; i++) {
76 #ifdef DEBUG
77 nsDependentCString name(gEntries[i].domain);
78 nsCAutoString normalizedName(gEntries[i].domain);
79 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
80 "normalization failure!");
81 NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
82 #endif
83 nsDomainEntry *entry = mHash.PutEntry(gEntries[i].domain);
84 NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
85 entry->SetData(&gEntries[i]);
87 return NS_OK;
90 // External function for dealing with URI's correctly.
91 // Pulls out the host portion from an nsIURI, and calls through to
92 // GetPublicSuffixFromHost().
93 NS_IMETHODIMP
94 nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
95 nsACString &aPublicSuffix)
97 NS_ENSURE_ARG_POINTER(aURI);
99 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
100 NS_ENSURE_ARG_POINTER(innerURI);
102 nsCAutoString host;
103 nsresult rv = innerURI->GetAsciiHost(host);
104 if (NS_FAILED(rv)) return rv;
106 return GetBaseDomainInternal(host, 0, aPublicSuffix);
109 // External function for dealing with URI's correctly.
110 // Pulls out the host portion from an nsIURI, and calls through to
111 // GetBaseDomainFromHost().
112 NS_IMETHODIMP
113 nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
114 PRUint32 aAdditionalParts,
115 nsACString &aBaseDomain)
117 NS_ENSURE_ARG_POINTER(aURI);
119 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
120 NS_ENSURE_ARG_POINTER(innerURI);
122 nsCAutoString host;
123 nsresult rv = innerURI->GetAsciiHost(host);
124 if (NS_FAILED(rv)) return rv;
126 return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
129 // External function for dealing with a host string directly: finds the public
130 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
131 NS_IMETHODIMP
132 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
133 nsACString &aPublicSuffix)
135 // Create a mutable copy of the hostname and normalize it to ACE.
136 // This will fail if the hostname includes invalid characters.
137 nsCAutoString normHostname(aHostname);
138 nsresult rv = NormalizeHostname(normHostname);
139 if (NS_FAILED(rv)) return rv;
141 return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
144 // External function for dealing with a host string directly: finds the base
145 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
146 // requested. See GetBaseDomainInternal().
147 NS_IMETHODIMP
148 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
149 PRUint32 aAdditionalParts,
150 nsACString &aBaseDomain)
152 // Create a mutable copy of the hostname and normalize it to ACE.
153 // This will fail if the hostname includes invalid characters.
154 nsCAutoString normHostname(aHostname);
155 nsresult rv = NormalizeHostname(normHostname);
156 if (NS_FAILED(rv)) return rv;
158 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
161 // Finds the base domain for a host, with requested number of additional parts.
162 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
163 // if more subdomain parts are requested than are available, or if the hostname
164 // includes characters that are not valid in a URL. Normalization is performed
165 // on the host string and the result will be in UTF8.
166 nsresult
167 nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
168 PRUint32 aAdditionalParts,
169 nsACString &aBaseDomain)
171 if (aHostname.IsEmpty())
172 return NS_ERROR_INVALID_ARG;
174 // chomp any trailing dot, and keep track of it for later
175 PRBool trailingDot = aHostname.Last() == '.';
176 if (trailingDot)
177 aHostname.Truncate(aHostname.Length() - 1);
179 // Check if we're dealing with an IPv4/IPv6 hostname, and return
180 PRNetAddr addr;
181 PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
182 if (result == PR_SUCCESS)
183 return NS_ERROR_HOST_IS_IP_ADDRESS;
185 // Walk up the domain tree, most specific to least specific,
186 // looking for matches at each level. Note that a given level may
187 // have multiple attributes (e.g. IsWild() and IsNormal()).
188 const char *prevDomain = nsnull;
189 const char *currDomain = aHostname.get();
190 const char *nextDot = strchr(currDomain, '.');
191 const char *end = currDomain + aHostname.Length();
192 const char *eTLD = currDomain;
193 while (1) {
194 nsDomainEntry *entry = mHash.GetEntry(currDomain);
195 if (entry) {
196 if (entry->IsWild() && prevDomain) {
197 // wildcard rules imply an eTLD one level inferior to the match.
198 eTLD = prevDomain;
199 break;
201 } else if (entry->IsNormal() || !nextDot) {
202 // specific match, or we've hit the top domain level
203 eTLD = currDomain;
204 break;
206 } else if (entry->IsException()) {
207 // exception rules imply an eTLD one level superior to the match.
208 eTLD = nextDot + 1;
209 break;
213 if (!nextDot) {
214 // we've hit the top domain level; use it by default.
215 eTLD = currDomain;
216 break;
219 prevDomain = currDomain;
220 currDomain = nextDot + 1;
221 nextDot = strchr(currDomain, '.');
224 // count off the number of requested domains.
225 const char *begin = aHostname.get();
226 const char *iter = eTLD;
227 while (1) {
228 if (iter == begin)
229 break;
231 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
232 ++iter;
233 ++aAdditionalParts;
234 break;
238 if (aAdditionalParts != 0)
239 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
241 aBaseDomain = Substring(iter, end);
242 // add on the trailing dot, if applicable
243 if (trailingDot)
244 aBaseDomain.Append('.');
246 return NS_OK;
249 // Normalizes the given hostname, component by component. ASCII/ACE
250 // components are lower-cased, and UTF-8 components are normalized per
251 // RFC 3454 and converted to ACE.
252 nsresult
253 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
255 if (!IsASCII(aHostname)) {
256 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
257 if (NS_FAILED(rv))
258 return rv;
261 ToLowerCase(aHostname);
262 return NS_OK;