Bug 1931425 - Limit how often moz-label's #setStyles runs r=reusable-components-revie...
[gecko.git] / netwerk / dns / nsEffectiveTLDService.cpp
blob6976ced6877b88e4f3af0538be52bd5df7adc3c2
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/Components.h"
13 #include "mozilla/HashFunctions.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/ResultExtensions.h"
16 #include "mozilla/TextUtils.h"
17 #include "mozilla/Try.h"
19 #include "MainThreadUtils.h"
20 #include "nsContentUtils.h"
21 #include "nsCRT.h"
22 #include "nsEffectiveTLDService.h"
23 #include "nsIFile.h"
24 #include "nsIURI.h"
25 #include "nsNetCID.h"
26 #include "nsNetUtil.h"
27 #include "nsServiceManagerUtils.h"
28 #include "mozilla/net/DNS.h"
30 namespace etld_dafsa {
32 // Generated file that includes kDafsa
33 #include "etld_data.inc"
35 } // namespace etld_dafsa
37 using namespace mozilla;
39 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
40 nsIMemoryReporter)
42 // ----------------------------------------------------------------------
44 static nsEffectiveTLDService* gService = nullptr;
46 nsEffectiveTLDService::nsEffectiveTLDService() : mGraph(etld_dafsa::kDafsa) {}
48 nsresult nsEffectiveTLDService::Init() {
49 MOZ_ASSERT(NS_IsMainThread());
51 if (gService) {
52 return NS_ERROR_ALREADY_INITIALIZED;
55 gService = this;
56 RegisterWeakMemoryReporter(this);
58 return NS_OK;
61 nsEffectiveTLDService::~nsEffectiveTLDService() {
62 UnregisterWeakMemoryReporter(this);
63 // See https://bugzilla.mozilla.org/show_bug.cgi?id=1786310#c15
64 if (gService == this) {
65 gService = nullptr;
69 // static
70 nsEffectiveTLDService* nsEffectiveTLDService::GetInstance() {
71 if (gService) {
72 return gService;
74 nsCOMPtr<nsIEffectiveTLDService> tldService;
75 tldService = mozilla::components::EffectiveTLD::Service();
76 if (!tldService) {
77 return nullptr;
79 MOZ_ASSERT(
80 gService,
81 "gService must have been initialized in nsEffectiveTLDService::Init");
82 return gService;
85 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
87 // The amount of heap memory measured here is tiny. It used to be bigger when
88 // nsEffectiveTLDService used a separate hash table instead of binary search.
89 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
90 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
91 NS_IMETHODIMP
92 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
93 nsISupports* aData, bool aAnonymize) {
94 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP,
95 UNITS_BYTES,
96 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
97 "Memory used by the effective TLD service.");
99 return NS_OK;
102 size_t nsEffectiveTLDService::SizeOfIncludingThis(
103 mozilla::MallocSizeOf aMallocSizeOf) {
104 size_t n = aMallocSizeOf(this);
106 return n;
109 // External function for dealing with URI's correctly.
110 // Pulls out the host portion from an nsIURI, and calls through to
111 // GetPublicSuffixFromHost().
112 NS_IMETHODIMP
113 nsEffectiveTLDService::GetPublicSuffix(nsIURI* aURI,
114 nsACString& aPublicSuffix) {
115 NS_ENSURE_ARG_POINTER(aURI);
117 nsAutoCString host;
118 nsresult rv = NS_GetInnermostURIHost(aURI, host);
119 if (NS_FAILED(rv)) {
120 return rv;
123 return GetBaseDomainInternal(host, 0, false, aPublicSuffix);
126 NS_IMETHODIMP
127 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI* aURI,
128 nsACString& aPublicSuffix) {
129 NS_ENSURE_ARG_POINTER(aURI);
131 nsAutoCString host;
132 nsresult rv = NS_GetInnermostURIHost(aURI, host);
133 if (NS_FAILED(rv)) {
134 return rv;
137 return GetBaseDomainInternal(host, 0, true, aPublicSuffix);
140 // External function for dealing with URI's correctly.
141 // Pulls out the host portion from an nsIURI, and calls through to
142 // GetBaseDomainFromHost().
143 NS_IMETHODIMP
144 nsEffectiveTLDService::GetBaseDomain(nsIURI* aURI, uint32_t aAdditionalParts,
145 nsACString& aBaseDomain) {
146 NS_ENSURE_ARG_POINTER(aURI);
147 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
149 nsAutoCString host;
150 nsresult rv = NS_GetInnermostURIHost(aURI, host);
151 if (NS_FAILED(rv)) {
152 return rv;
155 return GetBaseDomainInternal(host, aAdditionalParts + 1, false, aBaseDomain);
158 // External function for dealing with URIs to get a schemeless site.
159 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
160 // just returning their serialized host.
161 NS_IMETHODIMP
162 nsEffectiveTLDService::GetSchemelessSite(nsIURI* aURI, nsACString& aSite) {
163 NS_ENSURE_ARG_POINTER(aURI);
165 nsresult rv = GetBaseDomain(aURI, 0, aSite);
166 if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
167 rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
168 rv = nsContentUtils::GetHostOrIPv6WithBrackets(aURI, aSite);
170 return rv;
173 // Variant of GetSchemelessSite which accepts a host string instead of a URI.
174 NS_IMETHODIMP
175 nsEffectiveTLDService::GetSchemelessSiteFromHost(const nsACString& aHostname,
176 nsACString& aSite) {
177 NS_ENSURE_TRUE(!aHostname.IsEmpty(), NS_ERROR_FAILURE);
179 nsresult rv = GetBaseDomainFromHost(aHostname, 0, aSite);
180 if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
181 rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
182 aSite.Assign(aHostname);
183 nsContentUtils::MaybeFixIPv6Host(aSite);
185 return NS_OK;
187 return rv;
190 // External function for dealing with URIs to get site correctly.
191 // Calls through to GetSchemelessSite(), and serializes with the scheme and
192 // "://" prepended.
193 NS_IMETHODIMP
194 nsEffectiveTLDService::GetSite(nsIURI* aURI, nsACString& aSite) {
195 NS_ENSURE_ARG_POINTER(aURI);
197 nsAutoCString scheme;
198 nsresult rv = aURI->GetScheme(scheme);
199 NS_ENSURE_SUCCESS(rv, rv);
201 nsAutoCString schemeless;
202 rv = GetSchemelessSite(aURI, schemeless);
203 NS_ENSURE_SUCCESS(rv, rv);
205 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
206 if (schemeless.Length() == 1 && schemeless.Last() == '.') {
207 return NS_ERROR_INVALID_ARG;
210 // Reject any URIs without a host that aren't file:// URIs.
211 if (schemeless.IsEmpty() && !aURI->SchemeIs("file")) {
212 return NS_ERROR_INVALID_ARG;
215 aSite.SetCapacity(scheme.Length() + 3 + schemeless.Length());
216 aSite.Append(scheme);
217 aSite.Append("://"_ns);
218 aSite.Append(schemeless);
220 return NS_OK;
223 // External function for dealing with a host string directly: finds the public
224 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
225 NS_IMETHODIMP
226 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString& aHostname,
227 nsACString& aPublicSuffix) {
228 // This will fail if the hostname includes invalid characters.
229 nsAutoCString normHostname;
230 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
231 if (NS_FAILED(rv)) {
232 return rv;
235 return GetBaseDomainInternal(normHostname, 0, false, aPublicSuffix);
238 NS_IMETHODIMP
239 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString& aHostname,
240 nsACString& aPublicSuffix) {
241 // This will fail if the hostname includes invalid characters.
242 nsAutoCString normHostname;
243 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
244 if (NS_FAILED(rv)) {
245 return rv;
248 return GetBaseDomainInternal(normHostname, 0, true, aPublicSuffix);
251 // External function for dealing with a host string directly: finds the base
252 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
253 // requested. See GetBaseDomainInternal().
254 NS_IMETHODIMP
255 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString& aHostname,
256 uint32_t aAdditionalParts,
257 nsACString& aBaseDomain) {
258 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
260 // This will fail if the hostname includes invalid characters.
261 nsAutoCString normHostname;
262 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
263 if (NS_FAILED(rv)) {
264 return rv;
267 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, false,
268 aBaseDomain);
271 NS_IMETHODIMP
272 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
273 nsACString& aBaseDomain) {
274 // This will fail if the hostname includes invalid characters.
275 nsAutoCString normHostname;
276 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
277 if (NS_FAILED(rv)) {
278 return rv;
281 return GetBaseDomainInternal(normHostname, -1, false, aBaseDomain);
284 // Finds the base domain for a host, with requested number of additional parts.
285 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
286 // if more subdomain parts are requested than are available, or if the hostname
287 // includes characters that are not valid in a URL. Normalization is performed
288 // on the host string and the result will be in UTF8.
289 nsresult nsEffectiveTLDService::GetBaseDomainInternal(
290 nsCString& aHostname, int32_t aAdditionalParts, bool aOnlyKnownPublicSuffix,
291 nsACString& aBaseDomain) {
292 const int kExceptionRule = 1;
293 const int kWildcardRule = 2;
295 if (aHostname.IsEmpty()) {
296 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
299 // chomp any trailing dot, and keep track of it for later
300 bool trailingDot = aHostname.Last() == '.';
301 if (trailingDot) {
302 aHostname.Truncate(aHostname.Length() - 1);
305 // check the edge cases of the host being '.' or having a second trailing '.',
306 // since subsequent checks won't catch it.
307 if (aHostname.IsEmpty() || aHostname.Last() == '.') {
308 return NS_ERROR_INVALID_ARG;
311 // Lookup in the cache if this is a normal query. This is restricted to
312 // main thread-only as the cache is not thread-safe.
313 Maybe<TldCache::Entry> entry;
314 if (aAdditionalParts == 1 && NS_IsMainThread()) {
315 auto p = mMruTable.Lookup(aHostname);
316 if (p) {
317 if (NS_FAILED(p.Data().mResult)) {
318 return p.Data().mResult;
321 // There was a match, just return the cached value.
322 aBaseDomain = p.Data().mBaseDomain;
323 if (trailingDot) {
324 aBaseDomain.Append('.');
327 return NS_OK;
330 entry = Some(p);
333 // Check if we're dealing with an IPv4/IPv6 hostname, and return
334 if (mozilla::net::HostIsIPLiteral(aHostname)) {
335 // Update the MRU table if in use.
336 if (entry) {
337 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_HOST_IS_IP_ADDRESS});
340 return NS_ERROR_HOST_IS_IP_ADDRESS;
343 // Walk up the domain tree, most specific to least specific,
344 // looking for matches at each level. Note that a given level may
345 // have multiple attributes (e.g. IsWild() and IsNormal()).
346 const char* prevDomain = nullptr;
347 const char* currDomain = aHostname.get();
348 const char* nextDot = strchr(currDomain, '.');
349 const char* end = currDomain + aHostname.Length();
350 // Default value of *eTLD is currDomain as set in the while loop below
351 const char* eTLD = nullptr;
352 bool hasKnownPublicSuffix = false;
353 while (true) {
354 // sanity check the string we're about to look up: it should not begin
355 // with a '.'; this would mean the hostname began with a '.' or had an
356 // embedded '..' sequence.
357 if (*currDomain == '.') {
358 // Update the MRU table if in use.
359 if (entry) {
360 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INVALID_ARG});
363 return NS_ERROR_INVALID_ARG;
366 // Perform the lookup.
367 const int result = mGraph.Lookup(Substring(currDomain, end));
369 if (result != Dafsa::kKeyNotFound) {
370 hasKnownPublicSuffix = true;
371 if (result == kWildcardRule && prevDomain) {
372 // wildcard rules imply an eTLD one level inferior to the match.
373 eTLD = prevDomain;
374 break;
376 if (result != kExceptionRule || !nextDot) {
377 // specific match, or we've hit the top domain level
378 eTLD = currDomain;
379 break;
381 if (result == kExceptionRule) {
382 // exception rules imply an eTLD one level superior to the match.
383 eTLD = nextDot + 1;
384 break;
388 if (!nextDot) {
389 // we've hit the top domain level; use it by default.
390 eTLD = currDomain;
391 break;
394 prevDomain = currDomain;
395 currDomain = nextDot + 1;
396 nextDot = strchr(currDomain, '.');
399 if (aOnlyKnownPublicSuffix && !hasKnownPublicSuffix) {
400 aBaseDomain.Truncate();
401 return NS_OK;
404 const char *begin, *iter;
405 if (aAdditionalParts < 0) {
406 NS_ASSERTION(aAdditionalParts == -1,
407 "aAdditionalParts can't be negative and different from -1");
409 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++) {
413 if (iter != eTLD) {
414 iter++;
416 if (iter != eTLD) {
417 aAdditionalParts = 0;
419 } else {
420 // count off the number of requested domains.
421 begin = aHostname.get();
422 iter = eTLD;
424 while (true) {
425 if (iter == begin) {
426 break;
429 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
430 ++iter;
431 ++aAdditionalParts;
432 break;
437 if (aAdditionalParts != 0) {
438 // Update the MRU table if in use.
439 if (entry) {
440 entry->Set(
441 TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS});
444 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
447 aBaseDomain = Substring(iter, end);
449 // Update the MRU table if in use.
450 if (entry) {
451 entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain), NS_OK});
454 // add on the trailing dot, if applicable
455 if (trailingDot) {
456 aBaseDomain.Append('.');
459 return NS_OK;
462 NS_IMETHODIMP
463 nsEffectiveTLDService::HasRootDomain(const nsACString& aInput,
464 const nsACString& aHost, bool* aResult) {
465 return net::HasRootDomain(aInput, aHost, aResult);
468 NS_IMETHODIMP
469 nsEffectiveTLDService::HasKnownPublicSuffix(nsIURI* aURI, bool* aResult) {
470 NS_ENSURE_ARG_POINTER(aURI);
472 nsAutoCString host;
473 nsresult rv = NS_GetInnermostURIHost(aURI, host);
474 if (NS_FAILED(rv)) {
475 return rv;
478 return HasKnownPublicSuffixFromHost(host, aResult);
481 NS_IMETHODIMP
482 nsEffectiveTLDService::HasKnownPublicSuffixFromHost(const nsACString& aHostname,
483 bool* aResult) {
484 // Create a mutable copy of the hostname and normalize it to ACE.
485 // This will fail if the hostname includes invalid characters.
486 nsAutoCString hostname;
487 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, hostname);
488 if (NS_FAILED(rv)) {
489 return rv;
492 if (hostname.IsEmpty() || hostname == ".") {
493 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
496 // Remove any trailing dot ("example.com." should have a valid suffix)
497 if (hostname.Last() == '.') {
498 hostname.Truncate(hostname.Length() - 1);
501 // Check if we can find a suffix on the PSL. Start with the top level domain
502 // (for example "com" in "example.com"). If that isn't on the PSL, continue to
503 // add domain segments from the end (for example for "example.co.za", "za" is
504 // not on the PSL, but "co.za" is).
505 int32_t dotBeforeSuffix = -1;
506 int8_t i = 0;
507 do {
508 dotBeforeSuffix = Substring(hostname, 0, dotBeforeSuffix).RFindChar('.');
510 const nsACString& suffix = Substring(
511 hostname, dotBeforeSuffix == kNotFound ? 0 : dotBeforeSuffix + 1);
513 if (mGraph.Lookup(suffix) != Dafsa::kKeyNotFound) {
514 *aResult = true;
515 return NS_OK;
518 // To save time, only check up to 9 segments. We can be certain at that
519 // point that the PSL doesn't contain a suffix with that many segments if we
520 // didn't find a suffix earlier.
521 i++;
522 } while (dotBeforeSuffix != kNotFound && i < 10);
524 *aResult = false;
525 return NS_OK;