1 //* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Mozilla Effective-TLD Service
17 * The Initial Developer of the Original Code is
19 * Portions created by the Initial Developer are Copyright (C) 2006
20 * the Initial Developer. All Rights Reserved.
23 * Pamela Greene <pamg.bugs@gmail.com> (original author)
24 * Daniel Witte <dwitte@stanford.edu>
25 * Jeff Walden <jwalden+code@mit.edu>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 // This service reads a file of rules describing TLD-like domain names. For a
42 // complete description of the expected file format and parsing rules, see
43 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
45 #include "nsEffectiveTLDService.h"
46 #include "nsIIDNService.h"
47 #include "nsNetUtil.h"
50 NS_IMPL_ISUPPORTS1(nsEffectiveTLDService
, nsIEffectiveTLDService
)
52 // ----------------------------------------------------------------------
54 static const ETLDEntry gEntries
[] =
55 #include "etld_data.inc"
58 // ----------------------------------------------------------------------
61 nsEffectiveTLDService::Init()
63 // We'll probably have to rehash at least once, since nsTHashtable doesn't
64 // use a perfect hash, but at least we'll save a few rehashes along the way.
65 // Next optimization here is to precompute the hash using something like
66 // gperf, but one step at a time. :-)
67 if (!mHash
.Init(NS_ARRAY_LENGTH(gEntries
) - 1))
68 return NS_ERROR_OUT_OF_MEMORY
;
71 mIDNService
= do_GetService(NS_IDNSERVICE_CONTRACTID
, &rv
);
72 if (NS_FAILED(rv
)) return rv
;
74 // Initialize eTLD hash from static array
75 for (PRUint32 i
= 0; i
< NS_ARRAY_LENGTH(gEntries
) - 1; i
++) {
77 nsDependentCString
name(gEntries
[i
].domain
);
78 nsCAutoString
normalizedName(gEntries
[i
].domain
);
79 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName
)),
80 "normalization failure!");
81 NS_ASSERTION(name
.Equals(normalizedName
), "domain not normalized!");
83 nsDomainEntry
*entry
= mHash
.PutEntry(gEntries
[i
].domain
);
84 NS_ENSURE_TRUE(entry
, NS_ERROR_OUT_OF_MEMORY
);
85 entry
->SetData(&gEntries
[i
]);
90 // External function for dealing with URI's correctly.
91 // Pulls out the host portion from an nsIURI, and calls through to
92 // GetPublicSuffixFromHost().
94 nsEffectiveTLDService::GetPublicSuffix(nsIURI
*aURI
,
95 nsACString
&aPublicSuffix
)
97 NS_ENSURE_ARG_POINTER(aURI
);
99 nsCOMPtr
<nsIURI
> innerURI
= NS_GetInnermostURI(aURI
);
100 NS_ENSURE_ARG_POINTER(innerURI
);
103 nsresult rv
= innerURI
->GetAsciiHost(host
);
104 if (NS_FAILED(rv
)) return rv
;
106 return GetBaseDomainInternal(host
, 0, aPublicSuffix
);
109 // External function for dealing with URI's correctly.
110 // Pulls out the host portion from an nsIURI, and calls through to
111 // GetBaseDomainFromHost().
113 nsEffectiveTLDService::GetBaseDomain(nsIURI
*aURI
,
114 PRUint32 aAdditionalParts
,
115 nsACString
&aBaseDomain
)
117 NS_ENSURE_ARG_POINTER(aURI
);
119 nsCOMPtr
<nsIURI
> innerURI
= NS_GetInnermostURI(aURI
);
120 NS_ENSURE_ARG_POINTER(innerURI
);
123 nsresult rv
= innerURI
->GetAsciiHost(host
);
124 if (NS_FAILED(rv
)) return rv
;
126 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, aBaseDomain
);
129 // External function for dealing with a host string directly: finds the public
130 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
132 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
&aHostname
,
133 nsACString
&aPublicSuffix
)
135 // Create a mutable copy of the hostname and normalize it to ACE.
136 // This will fail if the hostname includes invalid characters.
137 nsCAutoString
normHostname(aHostname
);
138 nsresult rv
= NormalizeHostname(normHostname
);
139 if (NS_FAILED(rv
)) return rv
;
141 return GetBaseDomainInternal(normHostname
, 0, aPublicSuffix
);
144 // External function for dealing with a host string directly: finds the base
145 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
146 // requested. See GetBaseDomainInternal().
148 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
&aHostname
,
149 PRUint32 aAdditionalParts
,
150 nsACString
&aBaseDomain
)
152 // Create a mutable copy of the hostname and normalize it to ACE.
153 // This will fail if the hostname includes invalid characters.
154 nsCAutoString
normHostname(aHostname
);
155 nsresult rv
= NormalizeHostname(normHostname
);
156 if (NS_FAILED(rv
)) return rv
;
158 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, aBaseDomain
);
161 // Finds the base domain for a host, with requested number of additional parts.
162 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
163 // if more subdomain parts are requested than are available, or if the hostname
164 // includes characters that are not valid in a URL. Normalization is performed
165 // on the host string and the result will be in UTF8.
167 nsEffectiveTLDService::GetBaseDomainInternal(nsCString
&aHostname
,
168 PRUint32 aAdditionalParts
,
169 nsACString
&aBaseDomain
)
171 if (aHostname
.IsEmpty())
172 return NS_ERROR_INVALID_ARG
;
174 // chomp any trailing dot, and keep track of it for later
175 PRBool trailingDot
= aHostname
.Last() == '.';
177 aHostname
.Truncate(aHostname
.Length() - 1);
179 // Check if we're dealing with an IPv4/IPv6 hostname, and return
181 PRStatus result
= PR_StringToNetAddr(aHostname
.get(), &addr
);
182 if (result
== PR_SUCCESS
)
183 return NS_ERROR_HOST_IS_IP_ADDRESS
;
185 // Walk up the domain tree, most specific to least specific,
186 // looking for matches at each level. Note that a given level may
187 // have multiple attributes (e.g. IsWild() and IsNormal()).
188 const char *prevDomain
= nsnull
;
189 const char *currDomain
= aHostname
.get();
190 const char *nextDot
= strchr(currDomain
, '.');
191 const char *end
= currDomain
+ aHostname
.Length();
192 const char *eTLD
= currDomain
;
194 nsDomainEntry
*entry
= mHash
.GetEntry(currDomain
);
196 if (entry
->IsWild() && prevDomain
) {
197 // wildcard rules imply an eTLD one level inferior to the match.
201 } else if (entry
->IsNormal() || !nextDot
) {
202 // specific match, or we've hit the top domain level
206 } else if (entry
->IsException()) {
207 // exception rules imply an eTLD one level superior to the match.
214 // we've hit the top domain level; use it by default.
219 prevDomain
= currDomain
;
220 currDomain
= nextDot
+ 1;
221 nextDot
= strchr(currDomain
, '.');
224 // count off the number of requested domains.
225 const char *begin
= aHostname
.get();
226 const char *iter
= eTLD
;
231 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
238 if (aAdditionalParts
!= 0)
239 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
241 aBaseDomain
= Substring(iter
, end
);
242 // add on the trailing dot, if applicable
244 aBaseDomain
.Append('.');
249 // Normalizes the given hostname, component by component. ASCII/ACE
250 // components are lower-cased, and UTF-8 components are normalized per
251 // RFC 3454 and converted to ACE.
253 nsEffectiveTLDService::NormalizeHostname(nsCString
&aHostname
)
255 if (!IsASCII(aHostname
)) {
256 nsresult rv
= mIDNService
->ConvertUTF8toACE(aHostname
, aHostname
);
261 ToLowerCase(aHostname
);