1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/content_settings/core/common/content_settings_pattern.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/strings/string_split.h"
11 #include "base/strings/string_util.h"
12 #include "components/content_settings/core/common/content_settings_pattern_parser.h"
13 #include "net/base/dns_util.h"
14 #include "net/base/net_util.h"
19 // The component supports only one scheme for simplicity.
20 const char* non_port_non_domain_wildcard_scheme
= NULL
;
22 std::string
GetDefaultPort(const std::string
& scheme
) {
23 if (scheme
== url::kHttpScheme
)
25 if (scheme
== url::kHttpsScheme
)
30 // Returns true if |sub_domain| is a sub domain or equls |domain|. E.g.
31 // "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
32 // subdomain of "host.com".
33 bool IsSubDomainOrEqual(const std::string
& sub_domain
,
34 const std::string
& domain
) {
35 // The empty string serves as wildcard. Each domain is a subdomain of the
39 const size_t match
= sub_domain
.rfind(domain
);
40 if (match
== std::string::npos
||
41 (match
> 0 && sub_domain
[match
- 1] != '.') ||
42 (match
+ domain
.length() != sub_domain
.length())) {
48 // Compares two domain names.
49 int CompareDomainNames(const std::string
& str1
, const std::string
& str2
) {
50 std::vector
<std::string
> domain_name1
;
51 std::vector
<std::string
> domain_name2
;
53 base::SplitString(str1
, '.', &domain_name1
);
54 base::SplitString(str2
, '.', &domain_name2
);
56 int i1
= static_cast<int>(domain_name1
.size()) - 1;
57 int i2
= static_cast<int>(domain_name2
.size()) - 1;
59 while (i1
>= 0 && i2
>= 0) {
60 // domain names are stored in puny code. So it's fine to use the compare
62 rv
= domain_name1
[i1
].compare(domain_name2
[i2
]);
75 // The domain names are identical.
79 typedef ContentSettingsPattern::BuilderInterface BuilderInterface
;
83 // ////////////////////////////////////////////////////////////////////////////
84 // ContentSettingsPattern::Builder
86 class ContentSettingsPattern::Builder
:
87 public ContentSettingsPattern::BuilderInterface
{
89 explicit Builder(bool use_legacy_validate
);
93 BuilderInterface
* WithPort(const std::string
& port
) override
;
94 BuilderInterface
* WithPortWildcard() override
;
95 BuilderInterface
* WithHost(const std::string
& host
) override
;
96 BuilderInterface
* WithDomainWildcard() override
;
97 BuilderInterface
* WithScheme(const std::string
& scheme
) override
;
98 BuilderInterface
* WithSchemeWildcard() override
;
99 BuilderInterface
* WithPath(const std::string
& path
) override
;
100 BuilderInterface
* WithPathWildcard() override
;
101 BuilderInterface
* Invalid() override
;
102 ContentSettingsPattern
Build() override
;
105 // Canonicalizes the pattern parts so that they are ASCII only, either
106 // in original (if it was already ASCII) or punycode form. Returns true if
107 // the canonicalization was successful.
108 static bool Canonicalize(PatternParts
* parts
);
110 // Returns true when the pattern |parts| represent a valid pattern.
111 static bool Validate(const PatternParts
& parts
);
113 static bool LegacyValidate(const PatternParts
& parts
);
117 bool use_legacy_validate_
;
121 DISALLOW_COPY_AND_ASSIGN(Builder
);
124 ContentSettingsPattern::Builder::Builder(bool use_legacy_validate
)
126 use_legacy_validate_(use_legacy_validate
) {}
128 ContentSettingsPattern::Builder::~Builder() {}
130 BuilderInterface
* ContentSettingsPattern::Builder::WithPort(
131 const std::string
& port
) {
133 parts_
.is_port_wildcard
= false;
137 BuilderInterface
* ContentSettingsPattern::Builder::WithPortWildcard() {
139 parts_
.is_port_wildcard
= true;
143 BuilderInterface
* ContentSettingsPattern::Builder::WithHost(
144 const std::string
& host
) {
149 BuilderInterface
* ContentSettingsPattern::Builder::WithDomainWildcard() {
150 parts_
.has_domain_wildcard
= true;
154 BuilderInterface
* ContentSettingsPattern::Builder::WithScheme(
155 const std::string
& scheme
) {
156 parts_
.scheme
= scheme
;
157 parts_
.is_scheme_wildcard
= false;
161 BuilderInterface
* ContentSettingsPattern::Builder::WithSchemeWildcard() {
163 parts_
.is_scheme_wildcard
= true;
167 BuilderInterface
* ContentSettingsPattern::Builder::WithPath(
168 const std::string
& path
) {
170 parts_
.is_path_wildcard
= false;
174 BuilderInterface
* ContentSettingsPattern::Builder::WithPathWildcard() {
176 parts_
.is_path_wildcard
= true;
180 BuilderInterface
* ContentSettingsPattern::Builder::Invalid() {
185 ContentSettingsPattern
ContentSettingsPattern::Builder::Build() {
187 return ContentSettingsPattern();
188 if (!Canonicalize(&parts_
))
189 return ContentSettingsPattern();
190 if (use_legacy_validate_
) {
191 is_valid_
= LegacyValidate(parts_
);
193 is_valid_
= Validate(parts_
);
196 return ContentSettingsPattern();
198 // A pattern is invalid if canonicalization is not idempotent.
199 // This check is here because it should be checked no matter
200 // use_legacy_validate_ is.
201 PatternParts
parts(parts_
);
202 if (!Canonicalize(&parts
))
203 return ContentSettingsPattern();
204 if (ContentSettingsPattern(parts_
, true) !=
205 ContentSettingsPattern(parts
, true)) {
206 return ContentSettingsPattern();
209 return ContentSettingsPattern(parts_
, is_valid_
);
213 bool ContentSettingsPattern::Builder::Canonicalize(PatternParts
* parts
) {
214 // Canonicalize the scheme part.
215 const std::string
scheme(base::StringToLowerASCII(parts
->scheme
));
216 parts
->scheme
= scheme
;
218 if (parts
->scheme
== std::string(url::kFileScheme
) &&
219 !parts
->is_path_wildcard
) {
220 GURL
url(std::string(url::kFileScheme
) +
221 std::string(url::kStandardSchemeSeparator
) + parts
->path
);
222 parts
->path
= url
.path();
225 // Canonicalize the host part.
226 const std::string
host(parts
->host
);
227 url::CanonHostInfo host_info
;
228 std::string
canonicalized_host(net::CanonicalizeHost(host
, &host_info
));
229 if (host_info
.IsIPAddress() && parts
->has_domain_wildcard
)
231 canonicalized_host
= net::TrimEndingDot(canonicalized_host
);
234 if ((host
.find('*') == std::string::npos
) &&
235 !canonicalized_host
.empty()) {
237 parts
->host
+= canonicalized_host
;
243 bool ContentSettingsPattern::Builder::Validate(const PatternParts
& parts
) {
244 // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
245 if ((parts
.is_scheme_wildcard
&& !parts
.scheme
.empty()) ||
246 (parts
.is_port_wildcard
&& !parts
.port
.empty())) {
251 // file:// URL patterns have an empty host and port.
252 if (parts
.scheme
== std::string(url::kFileScheme
)) {
253 if (parts
.has_domain_wildcard
|| !parts
.host
.empty() || !parts
.port
.empty())
255 if (parts
.is_path_wildcard
)
256 return parts
.path
.empty();
257 return (!parts
.path
.empty() &&
259 parts
.path
.find("*") == std::string::npos
);
262 // If the pattern is for an extension URL test if it is valid.
263 if (IsNonWildcardDomainNonPortScheme(parts
.scheme
) &&
264 parts
.port
.empty() &&
265 !parts
.is_port_wildcard
) {
269 // Non-file patterns are invalid if either the scheme, host or port part is
271 if ((parts
.scheme
.empty() && !parts
.is_scheme_wildcard
) ||
272 (parts
.host
.empty() && !parts
.has_domain_wildcard
) ||
273 (parts
.port
.empty() && !parts
.is_port_wildcard
)) {
277 if (parts
.host
.find("*") != std::string::npos
)
280 // Test if the scheme is supported or a wildcard.
281 if (!parts
.is_scheme_wildcard
&&
282 parts
.scheme
!= std::string(url::kHttpScheme
) &&
283 parts
.scheme
!= std::string(url::kHttpsScheme
)) {
290 bool ContentSettingsPattern::Builder::LegacyValidate(
291 const PatternParts
& parts
) {
292 // If the pattern is for a "file-pattern" test if it is valid.
293 if (parts
.scheme
== std::string(url::kFileScheme
) &&
294 !parts
.is_scheme_wildcard
&&
295 parts
.host
.empty() &&
299 // If the pattern is for an extension URL test if it is valid.
300 if (IsNonWildcardDomainNonPortScheme(parts
.scheme
) &&
301 !parts
.is_scheme_wildcard
&&
302 !parts
.host
.empty() &&
303 !parts
.has_domain_wildcard
&&
304 parts
.port
.empty() &&
305 !parts
.is_port_wildcard
)
308 // Non-file patterns are invalid if either the scheme, host or port part is
310 if ((!parts
.is_scheme_wildcard
) ||
311 (parts
.host
.empty() && !parts
.has_domain_wildcard
) ||
312 (!parts
.is_port_wildcard
))
315 // Test if the scheme is supported or a wildcard.
316 if (!parts
.is_scheme_wildcard
&&
317 parts
.scheme
!= std::string(url::kHttpScheme
) &&
318 parts
.scheme
!= std::string(url::kHttpsScheme
)) {
324 // ////////////////////////////////////////////////////////////////////////////
325 // ContentSettingsPattern::PatternParts
327 ContentSettingsPattern::PatternParts::PatternParts()
328 : is_scheme_wildcard(false),
329 has_domain_wildcard(false),
330 is_port_wildcard(false),
331 is_path_wildcard(false) {}
333 ContentSettingsPattern::PatternParts::~PatternParts() {}
335 // ////////////////////////////////////////////////////////////////////////////
336 // ContentSettingsPattern
339 // The version of the pattern format implemented. Version 1 includes the
340 // following patterns:
341 // - [*.]domain.tld (matches domain.tld and all sub-domains)
342 // - host (matches an exact hostname)
343 // - a.b.c.d (matches an exact IPv4 ip)
344 // - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
345 // - file:///tmp/test.html (a complete URL without a host)
346 // Version 2 adds a resource identifier for plugins.
347 // TODO(jochen): update once this feature is no longer behind a flag.
348 const int ContentSettingsPattern::kContentSettingsPatternVersion
= 1;
351 BuilderInterface
* ContentSettingsPattern::CreateBuilder(
353 return new Builder(validate
);
357 ContentSettingsPattern
ContentSettingsPattern::Wildcard() {
358 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
359 ContentSettingsPattern::CreateBuilder(true));
360 builder
->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
362 return builder
->Build();
366 ContentSettingsPattern
ContentSettingsPattern::FromURL(
368 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
369 ContentSettingsPattern::CreateBuilder(false));
371 const GURL
* local_url
= &url
;
372 if (url
.SchemeIsFileSystem() && url
.inner_url()) {
373 local_url
= url
.inner_url();
375 if (local_url
->SchemeIsFile()) {
376 builder
->WithScheme(local_url
->scheme())->WithPath(local_url
->path());
378 // Please keep the order of the ifs below as URLs with an IP as host can
379 // also have a "http" scheme.
380 if (local_url
->HostIsIPAddress()) {
381 builder
->WithScheme(local_url
->scheme())->WithHost(local_url
->host());
382 } else if (local_url
->SchemeIs(url::kHttpScheme
)) {
383 builder
->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
385 } else if (local_url
->SchemeIs(url::kHttpsScheme
)) {
386 builder
->WithScheme(local_url
->scheme())->WithDomainWildcard()->WithHost(
389 // Unsupported scheme
391 if (local_url
->port().empty()) {
392 if (local_url
->SchemeIs(url::kHttpsScheme
))
393 builder
->WithPort(GetDefaultPort(url::kHttpsScheme
));
395 builder
->WithPortWildcard();
397 builder
->WithPort(local_url
->port());
400 return builder
->Build();
404 ContentSettingsPattern
ContentSettingsPattern::FromURLNoWildcard(
406 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
407 ContentSettingsPattern::CreateBuilder(false));
409 const GURL
* local_url
= &url
;
410 if (url
.SchemeIsFileSystem() && url
.inner_url()) {
411 local_url
= url
.inner_url();
413 if (local_url
->SchemeIsFile()) {
414 builder
->WithScheme(local_url
->scheme())->WithPath(local_url
->path());
416 builder
->WithScheme(local_url
->scheme())->WithHost(local_url
->host());
417 if (local_url
->port().empty()) {
418 builder
->WithPort(GetDefaultPort(local_url
->scheme()));
420 builder
->WithPort(local_url
->port());
423 return builder
->Build();
427 ContentSettingsPattern
ContentSettingsPattern::FromString(
428 const std::string
& pattern_spec
) {
429 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
430 ContentSettingsPattern::CreateBuilder(false));
431 content_settings::PatternParser::Parse(pattern_spec
,
433 return builder
->Build();
437 void ContentSettingsPattern::SetNonWildcardDomainNonPortScheme(
438 const char* scheme
) {
440 DCHECK(!non_port_non_domain_wildcard_scheme
||
441 non_port_non_domain_wildcard_scheme
== scheme
);
442 non_port_non_domain_wildcard_scheme
= scheme
;
446 bool ContentSettingsPattern::IsNonWildcardDomainNonPortScheme(
447 const std::string
& scheme
) {
448 DCHECK(non_port_non_domain_wildcard_scheme
);
449 return scheme
== non_port_non_domain_wildcard_scheme
;
452 ContentSettingsPattern::ContentSettingsPattern()
456 ContentSettingsPattern::ContentSettingsPattern(
457 const PatternParts
& parts
,
463 bool ContentSettingsPattern::Matches(
464 const GURL
& url
) const {
465 // An invalid pattern matches nothing.
469 const GURL
* local_url
= &url
;
470 if (url
.SchemeIsFileSystem() && url
.inner_url()) {
471 local_url
= url
.inner_url();
474 // Match the scheme part.
475 const std::string
scheme(local_url
->scheme());
476 if (!parts_
.is_scheme_wildcard
&&
477 parts_
.scheme
!= scheme
) {
481 // File URLs have no host. Matches if the pattern has the path wildcard set,
482 // or if the path in the URL is identical to the one in the pattern.
483 // For filesystem:file URLs, the path used is the filesystem type, so all
484 // filesystem:file:///temporary/... are equivalent.
485 // TODO(markusheintz): Content settings should be defined for all files on
486 // a machine. Unless there is a good use case for supporting paths for file
487 // patterns, stop supporting path for file patterns.
488 if (!parts_
.is_scheme_wildcard
&& scheme
== url::kFileScheme
)
489 return parts_
.is_path_wildcard
||
490 parts_
.path
== std::string(local_url
->path());
492 // Match the host part.
493 const std::string
host(net::TrimEndingDot(local_url
->host()));
494 if (!parts_
.has_domain_wildcard
) {
495 if (parts_
.host
!= host
)
498 if (!IsSubDomainOrEqual(host
, parts_
.host
))
502 // Ignore the port if the scheme doesn't support it.
503 if (IsNonWildcardDomainNonPortScheme(parts_
.scheme
))
506 // Match the port part.
507 std::string
port(local_url
->port());
509 // Use the default port if the port string is empty. GURL returns an empty
510 // string if no port at all was specified or if the default port was
513 port
= GetDefaultPort(scheme
);
516 if (!parts_
.is_port_wildcard
&&
517 parts_
.port
!= port
) {
524 bool ContentSettingsPattern::MatchesAllHosts() const {
525 return parts_
.has_domain_wildcard
&& parts_
.host
.empty();
528 std::string
ContentSettingsPattern::ToString() const {
530 return content_settings::PatternParser::ToString(parts_
);
532 return std::string();
535 ContentSettingsPattern::Relation
ContentSettingsPattern::Compare(
536 const ContentSettingsPattern
& other
) const {
537 // Two invalid patterns are identical in the way they behave. They don't match
538 // anything and are represented as an empty string. So it's fair to treat them
540 if ((this == &other
) ||
541 (!is_valid_
&& !other
.is_valid_
))
544 if (!is_valid_
&& other
.is_valid_
)
545 return DISJOINT_ORDER_POST
;
546 if (is_valid_
&& !other
.is_valid_
)
547 return DISJOINT_ORDER_PRE
;
549 // If either host, port or scheme are disjoint return immediately.
550 Relation host_relation
= CompareHost(parts_
, other
.parts_
);
551 if (host_relation
== DISJOINT_ORDER_PRE
||
552 host_relation
== DISJOINT_ORDER_POST
)
553 return host_relation
;
555 Relation port_relation
= ComparePort(parts_
, other
.parts_
);
556 if (port_relation
== DISJOINT_ORDER_PRE
||
557 port_relation
== DISJOINT_ORDER_POST
)
558 return port_relation
;
560 Relation scheme_relation
= CompareScheme(parts_
, other
.parts_
);
561 if (scheme_relation
== DISJOINT_ORDER_PRE
||
562 scheme_relation
== DISJOINT_ORDER_POST
)
563 return scheme_relation
;
565 if (host_relation
!= IDENTITY
)
566 return host_relation
;
567 if (port_relation
!= IDENTITY
)
568 return port_relation
;
569 return scheme_relation
;
572 bool ContentSettingsPattern::operator==(
573 const ContentSettingsPattern
& other
) const {
574 return Compare(other
) == IDENTITY
;
577 bool ContentSettingsPattern::operator!=(
578 const ContentSettingsPattern
& other
) const {
579 return !(*this == other
);
582 bool ContentSettingsPattern::operator<(
583 const ContentSettingsPattern
& other
) const {
584 return Compare(other
) < 0;
587 bool ContentSettingsPattern::operator>(
588 const ContentSettingsPattern
& other
) const {
589 return Compare(other
) > 0;
593 ContentSettingsPattern::Relation
ContentSettingsPattern::CompareScheme(
594 const ContentSettingsPattern::PatternParts
& parts
,
595 const ContentSettingsPattern::PatternParts
& other_parts
) {
596 if (parts
.is_scheme_wildcard
&& !other_parts
.is_scheme_wildcard
)
597 return ContentSettingsPattern::SUCCESSOR
;
598 if (!parts
.is_scheme_wildcard
&& other_parts
.is_scheme_wildcard
)
599 return ContentSettingsPattern::PREDECESSOR
;
601 int result
= parts
.scheme
.compare(other_parts
.scheme
);
603 return ContentSettingsPattern::IDENTITY
;
605 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
606 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
610 ContentSettingsPattern::Relation
ContentSettingsPattern::CompareHost(
611 const ContentSettingsPattern::PatternParts
& parts
,
612 const ContentSettingsPattern::PatternParts
& other_parts
) {
613 if (!parts
.has_domain_wildcard
&& !other_parts
.has_domain_wildcard
) {
614 // Case 1: No host starts with a wild card
615 int result
= CompareDomainNames(parts
.host
, other_parts
.host
);
617 return ContentSettingsPattern::IDENTITY
;
619 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
620 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
621 } else if (parts
.has_domain_wildcard
&& !other_parts
.has_domain_wildcard
) {
622 // Case 2: |host| starts with a domain wildcard and |other_host| does not
623 // start with a domain wildcard.
625 // "this" host: [*.]google.com
626 // "other" host: google.com
631 // [*.]mail.google.com
642 if (IsSubDomainOrEqual(other_parts
.host
, parts
.host
)) {
643 return ContentSettingsPattern::SUCCESSOR
;
645 if (CompareDomainNames(parts
.host
, other_parts
.host
) < 0)
646 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
647 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
649 } else if (!parts
.has_domain_wildcard
&& other_parts
.has_domain_wildcard
) {
650 // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
651 // with a domain wildcard.
652 if (IsSubDomainOrEqual(parts
.host
, other_parts
.host
)) {
653 return ContentSettingsPattern::PREDECESSOR
;
655 if (CompareDomainNames(parts
.host
, other_parts
.host
) < 0)
656 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
657 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
659 } else if (parts
.has_domain_wildcard
&& other_parts
.has_domain_wildcard
) {
660 // Case 4: |host| and |other_host| both start with a domain wildcard.
666 // [*.]mail.google.com
672 // [*.]mail.google.com
679 if (parts
.host
== other_parts
.host
) {
680 return ContentSettingsPattern::IDENTITY
;
681 } else if (IsSubDomainOrEqual(other_parts
.host
, parts
.host
)) {
682 return ContentSettingsPattern::SUCCESSOR
;
683 } else if (IsSubDomainOrEqual(parts
.host
, other_parts
.host
)) {
684 return ContentSettingsPattern::PREDECESSOR
;
686 if (CompareDomainNames(parts
.host
, other_parts
.host
) < 0)
687 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
688 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
693 return ContentSettingsPattern::IDENTITY
;
697 ContentSettingsPattern::Relation
ContentSettingsPattern::ComparePort(
698 const ContentSettingsPattern::PatternParts
& parts
,
699 const ContentSettingsPattern::PatternParts
& other_parts
) {
700 if (parts
.is_port_wildcard
&& !other_parts
.is_port_wildcard
)
701 return ContentSettingsPattern::SUCCESSOR
;
702 if (!parts
.is_port_wildcard
&& other_parts
.is_port_wildcard
)
703 return ContentSettingsPattern::PREDECESSOR
;
705 int result
= parts
.port
.compare(other_parts
.port
);
707 return ContentSettingsPattern::IDENTITY
;
709 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
710 return ContentSettingsPattern::DISJOINT_ORDER_POST
;