1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/Components.h"
13 #include "mozilla/HashFunctions.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/ResultExtensions.h"
16 #include "mozilla/TextUtils.h"
17 #include "mozilla/Try.h"
19 #include "MainThreadUtils.h"
20 #include "nsContentUtils.h"
22 #include "nsEffectiveTLDService.h"
26 #include "nsNetUtil.h"
27 #include "nsServiceManagerUtils.h"
28 #include "mozilla/net/DNS.h"
30 namespace etld_dafsa
{
32 // Generated file that includes kDafsa
33 #include "etld_data.inc"
35 } // namespace etld_dafsa
37 using namespace mozilla
;
39 NS_IMPL_ISUPPORTS(nsEffectiveTLDService
, nsIEffectiveTLDService
,
42 // ----------------------------------------------------------------------
44 static nsEffectiveTLDService
* gService
= nullptr;
46 nsEffectiveTLDService::nsEffectiveTLDService() : mGraph(etld_dafsa::kDafsa
) {}
48 nsresult
nsEffectiveTLDService::Init() {
49 MOZ_ASSERT(NS_IsMainThread());
52 return NS_ERROR_ALREADY_INITIALIZED
;
56 RegisterWeakMemoryReporter(this);
61 nsEffectiveTLDService::~nsEffectiveTLDService() {
62 UnregisterWeakMemoryReporter(this);
63 // See https://bugzilla.mozilla.org/show_bug.cgi?id=1786310#c15
64 if (gService
== this) {
70 nsEffectiveTLDService
* nsEffectiveTLDService::GetInstance() {
74 nsCOMPtr
<nsIEffectiveTLDService
> tldService
;
75 tldService
= mozilla::components::EffectiveTLD::Service();
81 "gService must have been initialized in nsEffectiveTLDService::Init");
85 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf
)
87 // The amount of heap memory measured here is tiny. It used to be bigger when
88 // nsEffectiveTLDService used a separate hash table instead of binary search.
89 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
90 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
92 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback
* aHandleReport
,
93 nsISupports
* aData
, bool aAnonymize
) {
94 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP
,
96 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf
),
97 "Memory used by the effective TLD service.");
102 size_t nsEffectiveTLDService::SizeOfIncludingThis(
103 mozilla::MallocSizeOf aMallocSizeOf
) {
104 size_t n
= aMallocSizeOf(this);
109 // External function for dealing with URI's correctly.
110 // Pulls out the host portion from an nsIURI, and calls through to
111 // GetPublicSuffixFromHost().
113 nsEffectiveTLDService::GetPublicSuffix(nsIURI
* aURI
,
114 nsACString
& aPublicSuffix
) {
115 NS_ENSURE_ARG_POINTER(aURI
);
118 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
123 return GetBaseDomainInternal(host
, 0, false, aPublicSuffix
);
127 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI
* aURI
,
128 nsACString
& aPublicSuffix
) {
129 NS_ENSURE_ARG_POINTER(aURI
);
132 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
137 return GetBaseDomainInternal(host
, 0, true, aPublicSuffix
);
140 // External function for dealing with URI's correctly.
141 // Pulls out the host portion from an nsIURI, and calls through to
142 // GetBaseDomainFromHost().
144 nsEffectiveTLDService::GetBaseDomain(nsIURI
* aURI
, uint32_t aAdditionalParts
,
145 nsACString
& aBaseDomain
) {
146 NS_ENSURE_ARG_POINTER(aURI
);
147 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
150 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
155 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, false, aBaseDomain
);
158 // External function for dealing with URIs to get a schemeless site.
159 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
160 // just returning their serialized host.
162 nsEffectiveTLDService::GetSchemelessSite(nsIURI
* aURI
, nsACString
& aSite
) {
163 NS_ENSURE_ARG_POINTER(aURI
);
165 nsresult rv
= GetBaseDomain(aURI
, 0, aSite
);
166 if (rv
== NS_ERROR_HOST_IS_IP_ADDRESS
||
167 rv
== NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
) {
168 rv
= nsContentUtils::GetHostOrIPv6WithBrackets(aURI
, aSite
);
173 // Variant of GetSchemelessSite which accepts a host string instead of a URI.
175 nsEffectiveTLDService::GetSchemelessSiteFromHost(const nsACString
& aHostname
,
177 NS_ENSURE_TRUE(!aHostname
.IsEmpty(), NS_ERROR_FAILURE
);
179 nsresult rv
= GetBaseDomainFromHost(aHostname
, 0, aSite
);
180 if (rv
== NS_ERROR_HOST_IS_IP_ADDRESS
||
181 rv
== NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
) {
182 aSite
.Assign(aHostname
);
183 nsContentUtils::MaybeFixIPv6Host(aSite
);
190 // External function for dealing with URIs to get site correctly.
191 // Calls through to GetSchemelessSite(), and serializes with the scheme and
194 nsEffectiveTLDService::GetSite(nsIURI
* aURI
, nsACString
& aSite
) {
195 NS_ENSURE_ARG_POINTER(aURI
);
197 nsAutoCString scheme
;
198 nsresult rv
= aURI
->GetScheme(scheme
);
199 NS_ENSURE_SUCCESS(rv
, rv
);
201 nsAutoCString schemeless
;
202 rv
= GetSchemelessSite(aURI
, schemeless
);
203 NS_ENSURE_SUCCESS(rv
, rv
);
205 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
206 if (schemeless
.Length() == 1 && schemeless
.Last() == '.') {
207 return NS_ERROR_INVALID_ARG
;
210 // Reject any URIs without a host that aren't file:// URIs.
211 if (schemeless
.IsEmpty() && !aURI
->SchemeIs("file")) {
212 return NS_ERROR_INVALID_ARG
;
215 aSite
.SetCapacity(scheme
.Length() + 3 + schemeless
.Length());
216 aSite
.Append(scheme
);
217 aSite
.Append("://"_ns
);
218 aSite
.Append(schemeless
);
223 // External function for dealing with a host string directly: finds the public
224 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
226 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
& aHostname
,
227 nsACString
& aPublicSuffix
) {
228 // This will fail if the hostname includes invalid characters.
229 nsAutoCString normHostname
;
230 nsresult rv
= NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname
, normHostname
);
235 return GetBaseDomainInternal(normHostname
, 0, false, aPublicSuffix
);
239 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString
& aHostname
,
240 nsACString
& aPublicSuffix
) {
241 // This will fail if the hostname includes invalid characters.
242 nsAutoCString normHostname
;
243 nsresult rv
= NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname
, normHostname
);
248 return GetBaseDomainInternal(normHostname
, 0, true, aPublicSuffix
);
251 // External function for dealing with a host string directly: finds the base
252 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
253 // requested. See GetBaseDomainInternal().
255 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
& aHostname
,
256 uint32_t aAdditionalParts
,
257 nsACString
& aBaseDomain
) {
258 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
260 // This will fail if the hostname includes invalid characters.
261 nsAutoCString normHostname
;
262 nsresult rv
= NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname
, normHostname
);
267 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, false,
272 nsEffectiveTLDService::GetNextSubDomain(const nsACString
& aHostname
,
273 nsACString
& aBaseDomain
) {
274 // This will fail if the hostname includes invalid characters.
275 nsAutoCString normHostname
;
276 nsresult rv
= NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname
, normHostname
);
281 return GetBaseDomainInternal(normHostname
, -1, false, aBaseDomain
);
284 // Finds the base domain for a host, with requested number of additional parts.
285 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
286 // if more subdomain parts are requested than are available, or if the hostname
287 // includes characters that are not valid in a URL. Normalization is performed
288 // on the host string and the result will be in UTF8.
289 nsresult
nsEffectiveTLDService::GetBaseDomainInternal(
290 nsCString
& aHostname
, int32_t aAdditionalParts
, bool aOnlyKnownPublicSuffix
,
291 nsACString
& aBaseDomain
) {
292 const int kExceptionRule
= 1;
293 const int kWildcardRule
= 2;
295 if (aHostname
.IsEmpty()) {
296 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
299 // chomp any trailing dot, and keep track of it for later
300 bool trailingDot
= aHostname
.Last() == '.';
302 aHostname
.Truncate(aHostname
.Length() - 1);
305 // check the edge cases of the host being '.' or having a second trailing '.',
306 // since subsequent checks won't catch it.
307 if (aHostname
.IsEmpty() || aHostname
.Last() == '.') {
308 return NS_ERROR_INVALID_ARG
;
311 // Lookup in the cache if this is a normal query. This is restricted to
312 // main thread-only as the cache is not thread-safe.
313 Maybe
<TldCache::Entry
> entry
;
314 if (aAdditionalParts
== 1 && NS_IsMainThread()) {
315 auto p
= mMruTable
.Lookup(aHostname
);
317 if (NS_FAILED(p
.Data().mResult
)) {
318 return p
.Data().mResult
;
321 // There was a match, just return the cached value.
322 aBaseDomain
= p
.Data().mBaseDomain
;
324 aBaseDomain
.Append('.');
333 // Check if we're dealing with an IPv4/IPv6 hostname, and return
334 if (mozilla::net::HostIsIPLiteral(aHostname
)) {
335 // Update the MRU table if in use.
337 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_HOST_IS_IP_ADDRESS
});
340 return NS_ERROR_HOST_IS_IP_ADDRESS
;
343 // Walk up the domain tree, most specific to least specific,
344 // looking for matches at each level. Note that a given level may
345 // have multiple attributes (e.g. IsWild() and IsNormal()).
346 const char* prevDomain
= nullptr;
347 const char* currDomain
= aHostname
.get();
348 const char* nextDot
= strchr(currDomain
, '.');
349 const char* end
= currDomain
+ aHostname
.Length();
350 // Default value of *eTLD is currDomain as set in the while loop below
351 const char* eTLD
= nullptr;
352 bool hasKnownPublicSuffix
= false;
354 // sanity check the string we're about to look up: it should not begin
355 // with a '.'; this would mean the hostname began with a '.' or had an
356 // embedded '..' sequence.
357 if (*currDomain
== '.') {
358 // Update the MRU table if in use.
360 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INVALID_ARG
});
363 return NS_ERROR_INVALID_ARG
;
366 // Perform the lookup.
367 const int result
= mGraph
.Lookup(Substring(currDomain
, end
));
369 if (result
!= Dafsa::kKeyNotFound
) {
370 hasKnownPublicSuffix
= true;
371 if (result
== kWildcardRule
&& prevDomain
) {
372 // wildcard rules imply an eTLD one level inferior to the match.
376 if (result
!= kExceptionRule
|| !nextDot
) {
377 // specific match, or we've hit the top domain level
381 if (result
== kExceptionRule
) {
382 // exception rules imply an eTLD one level superior to the match.
389 // we've hit the top domain level; use it by default.
394 prevDomain
= currDomain
;
395 currDomain
= nextDot
+ 1;
396 nextDot
= strchr(currDomain
, '.');
399 if (aOnlyKnownPublicSuffix
&& !hasKnownPublicSuffix
) {
400 aBaseDomain
.Truncate();
404 const char *begin
, *iter
;
405 if (aAdditionalParts
< 0) {
406 NS_ASSERTION(aAdditionalParts
== -1,
407 "aAdditionalParts can't be negative and different from -1");
409 for (iter
= aHostname
.get(); iter
!= eTLD
&& *iter
!= '.'; iter
++) {
417 aAdditionalParts
= 0;
420 // count off the number of requested domains.
421 begin
= aHostname
.get();
429 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
437 if (aAdditionalParts
!= 0) {
438 // Update the MRU table if in use.
441 TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
});
444 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
447 aBaseDomain
= Substring(iter
, end
);
449 // Update the MRU table if in use.
451 entry
->Set(TLDCacheEntry
{aHostname
, nsCString(aBaseDomain
), NS_OK
});
454 // add on the trailing dot, if applicable
456 aBaseDomain
.Append('.');
463 nsEffectiveTLDService::HasRootDomain(const nsACString
& aInput
,
464 const nsACString
& aHost
, bool* aResult
) {
465 return net::HasRootDomain(aInput
, aHost
, aResult
);
469 nsEffectiveTLDService::HasKnownPublicSuffix(nsIURI
* aURI
, bool* aResult
) {
470 NS_ENSURE_ARG_POINTER(aURI
);
473 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
478 return HasKnownPublicSuffixFromHost(host
, aResult
);
482 nsEffectiveTLDService::HasKnownPublicSuffixFromHost(const nsACString
& aHostname
,
484 // Create a mutable copy of the hostname and normalize it to ACE.
485 // This will fail if the hostname includes invalid characters.
486 nsAutoCString hostname
;
487 nsresult rv
= NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname
, hostname
);
492 if (hostname
.IsEmpty() || hostname
== ".") {
493 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
496 // Remove any trailing dot ("example.com." should have a valid suffix)
497 if (hostname
.Last() == '.') {
498 hostname
.Truncate(hostname
.Length() - 1);
501 // Check if we can find a suffix on the PSL. Start with the top level domain
502 // (for example "com" in "example.com"). If that isn't on the PSL, continue to
503 // add domain segments from the end (for example for "example.co.za", "za" is
504 // not on the PSL, but "co.za" is).
505 int32_t dotBeforeSuffix
= -1;
508 dotBeforeSuffix
= Substring(hostname
, 0, dotBeforeSuffix
).RFindChar('.');
510 const nsACString
& suffix
= Substring(
511 hostname
, dotBeforeSuffix
== kNotFound
? 0 : dotBeforeSuffix
+ 1);
513 if (mGraph
.Lookup(suffix
) != Dafsa::kKeyNotFound
) {
518 // To save time, only check up to 9 segments. We can be certain at that
519 // point that the PSL doesn't contain a suffix with that many segments if we
520 // didn't find a suffix earlier.
522 } while (dotBeforeSuffix
!= kNotFound
&& i
< 10);