1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/common/content_settings_pattern.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_split.h"
11 #include "base/string_util.h"
12 #include "chrome/common/content_settings_pattern_parser.h"
13 #include "chrome/common/render_messages.h"
14 #include "chrome/common/url_constants.h"
15 #include "extensions/common/constants.h"
16 #include "googleurl/src/gurl.h"
17 #include "googleurl/src/url_canon.h"
18 #include "ipc/ipc_message_utils.h"
19 #include "net/base/dns_util.h"
20 #include "net/base/net_util.h"
24 std::string
GetDefaultPort(const std::string
& scheme
) {
25 if (scheme
== chrome::kHttpScheme
)
27 if (scheme
== chrome::kHttpsScheme
)
32 // Returns true if |sub_domain| is a sub domain or equls |domain|. E.g.
33 // "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
34 // subdomain of "host.com".
35 bool IsSubDomainOrEqual(const std::string
& sub_domain
,
36 const std::string
& domain
) {
37 // The empty string serves as wildcard. Each domain is a subdomain of the
41 const size_t match
= sub_domain
.rfind(domain
);
42 if (match
== std::string::npos
||
43 (match
> 0 && sub_domain
[match
- 1] != '.') ||
44 (match
+ domain
.length() != sub_domain
.length())) {
50 // Compares two domain names.
51 int CompareDomainNames(const std::string
& str1
, const std::string
& str2
) {
52 std::vector
<std::string
> domain_name1
;
53 std::vector
<std::string
> domain_name2
;
55 base::SplitString(str1
, '.', &domain_name1
);
56 base::SplitString(str2
, '.', &domain_name2
);
58 int i1
= domain_name1
.size() - 1;
59 int i2
= domain_name2
.size() - 1;
61 while (i1
>= 0 && i2
>= 0) {
62 // domain names are stored in puny code. So it's fine to use the compare
64 rv
= domain_name1
[i1
].compare(domain_name2
[i2
]);
77 // The domain names are identical.
81 typedef ContentSettingsPattern::BuilderInterface BuilderInterface
;
85 // ////////////////////////////////////////////////////////////////////////////
86 // ContentSettingsPattern::Builder
88 ContentSettingsPattern::Builder::Builder(bool use_legacy_validate
)
90 use_legacy_validate_(use_legacy_validate
) {}
92 ContentSettingsPattern::Builder::~Builder() {}
94 BuilderInterface
* ContentSettingsPattern::Builder::WithPort(
95 const std::string
& port
) {
97 parts_
.is_port_wildcard
= false;
101 BuilderInterface
* ContentSettingsPattern::Builder::WithPortWildcard() {
103 parts_
.is_port_wildcard
= true;
107 BuilderInterface
* ContentSettingsPattern::Builder::WithHost(
108 const std::string
& host
) {
113 BuilderInterface
* ContentSettingsPattern::Builder::WithDomainWildcard() {
114 parts_
.has_domain_wildcard
= true;
118 BuilderInterface
* ContentSettingsPattern::Builder::WithScheme(
119 const std::string
& scheme
) {
120 parts_
.scheme
= scheme
;
121 parts_
.is_scheme_wildcard
= false;
125 BuilderInterface
* ContentSettingsPattern::Builder::WithSchemeWildcard() {
127 parts_
.is_scheme_wildcard
= true;
131 BuilderInterface
* ContentSettingsPattern::Builder::WithPath(
132 const std::string
& path
) {
134 parts_
.is_path_wildcard
= false;
138 BuilderInterface
* ContentSettingsPattern::Builder::WithPathWildcard() {
140 parts_
.is_path_wildcard
= true;
144 BuilderInterface
* ContentSettingsPattern::Builder::Invalid() {
149 ContentSettingsPattern
ContentSettingsPattern::Builder::Build() {
151 return ContentSettingsPattern();
152 if (!Canonicalize(&parts_
))
153 return ContentSettingsPattern();
154 if (use_legacy_validate_
) {
155 is_valid_
= LegacyValidate(parts_
);
157 is_valid_
= Validate(parts_
);
159 return ContentSettingsPattern(parts_
, is_valid_
);
163 bool ContentSettingsPattern::Builder::Canonicalize(PatternParts
* parts
) {
164 // Canonicalize the scheme part.
165 const std::string
scheme(StringToLowerASCII(parts
->scheme
));
166 parts
->scheme
= scheme
;
168 if (parts
->scheme
== std::string(chrome::kFileScheme
) &&
169 !parts
->is_path_wildcard
) {
170 GURL
url(std::string(chrome::kFileScheme
) +
171 std::string(content::kStandardSchemeSeparator
) + parts
->path
);
172 parts
->path
= url
.path();
175 // Canonicalize the host part.
176 const std::string
host(parts
->host
);
177 url_canon::CanonHostInfo host_info
;
178 std::string
canonicalized_host(net::CanonicalizeHost(host
, &host_info
));
179 if (host_info
.IsIPAddress() && parts
->has_domain_wildcard
)
181 canonicalized_host
= net::TrimEndingDot(canonicalized_host
);
184 if ((host
.find('*') == std::string::npos
) &&
185 !canonicalized_host
.empty()) {
187 parts
->host
+= canonicalized_host
;
193 bool ContentSettingsPattern::Builder::Validate(const PatternParts
& parts
) {
194 // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
195 if ((parts
.is_scheme_wildcard
&& !parts
.scheme
.empty()) ||
196 (parts
.is_port_wildcard
&& !parts
.port
.empty())) {
201 // file:// URL patterns have an empty host and port.
202 if (parts
.scheme
== std::string(chrome::kFileScheme
)) {
203 if (parts
.has_domain_wildcard
|| !parts
.host
.empty() || !parts
.port
.empty())
205 if (parts
.is_path_wildcard
)
206 return parts
.path
.empty();
207 return (!parts
.path
.empty() &&
209 parts
.path
.find("*") == std::string::npos
);
212 // If the pattern is for an extension URL test if it is valid.
213 if (parts
.scheme
== std::string(extensions::kExtensionScheme
) &&
214 parts
.port
.empty() &&
215 !parts
.is_port_wildcard
) {
219 // Non-file patterns are invalid if either the scheme, host or port part is
221 if ((parts
.scheme
.empty() && !parts
.is_scheme_wildcard
) ||
222 (parts
.host
.empty() && !parts
.has_domain_wildcard
) ||
223 (parts
.port
.empty() && !parts
.is_port_wildcard
)) {
227 if (parts
.host
.find("*") != std::string::npos
)
230 // Test if the scheme is supported or a wildcard.
231 if (!parts
.is_scheme_wildcard
&&
232 parts
.scheme
!= std::string(chrome::kHttpScheme
) &&
233 parts
.scheme
!= std::string(chrome::kHttpsScheme
)) {
240 bool ContentSettingsPattern::Builder::LegacyValidate(
241 const PatternParts
& parts
) {
242 // If the pattern is for a "file-pattern" test if it is valid.
243 if (parts
.scheme
== std::string(chrome::kFileScheme
) &&
244 !parts
.is_scheme_wildcard
&&
245 parts
.host
.empty() &&
249 // If the pattern is for an extension URL test if it is valid.
250 if (parts
.scheme
== std::string(extensions::kExtensionScheme
) &&
251 !parts
.is_scheme_wildcard
&&
252 !parts
.host
.empty() &&
253 !parts
.has_domain_wildcard
&&
254 parts
.port
.empty() &&
255 !parts
.is_port_wildcard
)
258 // Non-file patterns are invalid if either the scheme, host or port part is
260 if ((!parts
.is_scheme_wildcard
) ||
261 (parts
.host
.empty() && !parts
.has_domain_wildcard
) ||
262 (!parts
.is_port_wildcard
))
265 // Test if the scheme is supported or a wildcard.
266 if (!parts
.is_scheme_wildcard
&&
267 parts
.scheme
!= std::string(chrome::kHttpScheme
) &&
268 parts
.scheme
!= std::string(chrome::kHttpsScheme
)) {
274 // ////////////////////////////////////////////////////////////////////////////
275 // ContentSettingsPattern::PatternParts
277 ContentSettingsPattern::PatternParts::PatternParts()
278 : is_scheme_wildcard(false),
279 has_domain_wildcard(false),
280 is_port_wildcard(false),
281 is_path_wildcard(false) {}
283 ContentSettingsPattern::PatternParts::~PatternParts() {}
285 // ////////////////////////////////////////////////////////////////////////////
286 // ContentSettingsPattern
289 // The version of the pattern format implemented. Version 1 includes the
290 // following patterns:
291 // - [*.]domain.tld (matches domain.tld and all sub-domains)
292 // - host (matches an exact hostname)
293 // - a.b.c.d (matches an exact IPv4 ip)
294 // - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
295 // - file:///tmp/test.html (a complete URL without a host)
296 // Version 2 adds a resource identifier for plugins.
297 // TODO(jochen): update once this feature is no longer behind a flag.
298 const int ContentSettingsPattern::kContentSettingsPatternVersion
= 1;
300 // TODO(markusheintz): These two constants were moved to the Pattern Parser.
301 // Remove once the dependency of the ContentSettingsBaseProvider is removed.
302 const char* ContentSettingsPattern::kDomainWildcard
= "[*.]";
303 const size_t ContentSettingsPattern::kDomainWildcardLength
= 4;
306 BuilderInterface
* ContentSettingsPattern::CreateBuilder(
308 return new Builder(validate
);
312 ContentSettingsPattern
ContentSettingsPattern::FromURL(
314 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
315 ContentSettingsPattern::CreateBuilder(false));
317 const GURL
* local_url
= &url
;
318 if (url
.SchemeIsFileSystem() && url
.inner_url()) {
319 local_url
= url
.inner_url();
321 if (local_url
->SchemeIsFile()) {
322 builder
->WithScheme(local_url
->scheme())->WithPath(local_url
->path());
324 // Please keep the order of the ifs below as URLs with an IP as host can
325 // also have a "http" scheme.
326 if (local_url
->HostIsIPAddress()) {
327 builder
->WithScheme(local_url
->scheme())->WithHost(local_url
->host());
328 } else if (local_url
->SchemeIs(chrome::kHttpScheme
)) {
329 builder
->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
331 } else if (local_url
->SchemeIs(chrome::kHttpsScheme
)) {
332 builder
->WithScheme(local_url
->scheme())->WithDomainWildcard()->WithHost(
335 // Unsupported scheme
337 if (local_url
->port().empty()) {
338 if (local_url
->SchemeIs(chrome::kHttpsScheme
))
339 builder
->WithPort(GetDefaultPort(chrome::kHttpsScheme
));
341 builder
->WithPortWildcard();
343 builder
->WithPort(local_url
->port());
346 return builder
->Build();
350 ContentSettingsPattern
ContentSettingsPattern::FromURLNoWildcard(
352 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
353 ContentSettingsPattern::CreateBuilder(false));
355 const GURL
* local_url
= &url
;
356 if (url
.SchemeIsFileSystem() && url
.inner_url()) {
357 local_url
= url
.inner_url();
359 if (local_url
->SchemeIsFile()) {
360 builder
->WithScheme(local_url
->scheme())->WithPath(local_url
->path());
362 builder
->WithScheme(local_url
->scheme())->WithHost(local_url
->host());
363 if (local_url
->port().empty()) {
364 builder
->WithPort(GetDefaultPort(local_url
->scheme()));
366 builder
->WithPort(local_url
->port());
369 return builder
->Build();
373 ContentSettingsPattern
ContentSettingsPattern::FromString(
374 const std::string
& pattern_spec
) {
375 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
376 ContentSettingsPattern::CreateBuilder(false));
377 content_settings::PatternParser::Parse(pattern_spec
, builder
.get());
378 return builder
->Build();
382 ContentSettingsPattern
ContentSettingsPattern::LegacyFromString(
383 const std::string
& pattern_spec
) {
384 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
385 ContentSettingsPattern::CreateBuilder(true));
386 content_settings::PatternParser::Parse(pattern_spec
, builder
.get());
387 return builder
->Build();
391 ContentSettingsPattern
ContentSettingsPattern::Wildcard() {
392 scoped_ptr
<ContentSettingsPattern::BuilderInterface
> builder(
393 ContentSettingsPattern::CreateBuilder(true));
394 builder
->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
396 return builder
->Build();
399 ContentSettingsPattern::ContentSettingsPattern()
403 ContentSettingsPattern::ContentSettingsPattern(
404 const PatternParts
& parts
,
410 void ContentSettingsPattern::WriteToMessage(IPC::Message
* m
) const {
411 IPC::WriteParam(m
, is_valid_
);
412 IPC::WriteParam(m
, parts_
);
415 bool ContentSettingsPattern::ReadFromMessage(const IPC::Message
* m
,
416 PickleIterator
* iter
) {
417 return IPC::ReadParam(m
, iter
, &is_valid_
) &&
418 IPC::ReadParam(m
, iter
, &parts_
);
421 bool ContentSettingsPattern::Matches(
422 const GURL
& url
) const {
423 // An invalid pattern matches nothing.
427 const GURL
* local_url
= &url
;
428 if (url
.SchemeIsFileSystem() && url
.inner_url()) {
429 local_url
= url
.inner_url();
432 // Match the scheme part.
433 const std::string
scheme(local_url
->scheme());
434 if (!parts_
.is_scheme_wildcard
&&
435 parts_
.scheme
!= scheme
) {
439 // File URLs have no host. Matches if the pattern has the path wildcard set,
440 // or if the path in the URL is identical to the one in the pattern.
441 // For filesystem:file URLs, the path used is the filesystem type, so all
442 // filesystem:file:///temporary/... are equivalent.
443 // TODO(markusheintz): Content settings should be defined for all files on
444 // a machine. Unless there is a good use case for supporting paths for file
445 // patterns, stop supporting path for file patterns.
446 if (!parts_
.is_scheme_wildcard
&& scheme
== chrome::kFileScheme
)
447 return parts_
.is_path_wildcard
||
448 parts_
.path
== std::string(local_url
->path());
450 // Match the host part.
451 const std::string
host(net::TrimEndingDot(local_url
->host()));
452 if (!parts_
.has_domain_wildcard
) {
453 if (parts_
.host
!= host
)
456 if (!IsSubDomainOrEqual(host
, parts_
.host
))
460 // For chrome extensions URLs ignore the port.
461 if (parts_
.scheme
== std::string(extensions::kExtensionScheme
))
464 // Match the port part.
465 std::string
port(local_url
->port());
467 // Use the default port if the port string is empty. GURL returns an empty
468 // string if no port at all was specified or if the default port was
471 port
= GetDefaultPort(scheme
);
474 if (!parts_
.is_port_wildcard
&&
475 parts_
.port
!= port
) {
482 bool ContentSettingsPattern::MatchesAllHosts() const {
483 return parts_
.has_domain_wildcard
&& parts_
.host
.empty();
486 const std::string
ContentSettingsPattern::ToString() const {
488 return content_settings::PatternParser::ToString(parts_
);
493 ContentSettingsPattern::Relation
ContentSettingsPattern::Compare(
494 const ContentSettingsPattern
& other
) const {
495 // Two invalid patterns are identical in the way they behave. They don't match
496 // anything and are represented as an empty string. So it's fair to treat them
498 if ((this == &other
) ||
499 (!is_valid_
&& !other
.is_valid_
))
502 if (!is_valid_
&& other
.is_valid_
)
503 return DISJOINT_ORDER_POST
;
504 if (is_valid_
&& !other
.is_valid_
)
505 return DISJOINT_ORDER_PRE
;
507 // If either host, port or scheme are disjoint return immediately.
508 Relation host_relation
= CompareHost(parts_
, other
.parts_
);
509 if (host_relation
== DISJOINT_ORDER_PRE
||
510 host_relation
== DISJOINT_ORDER_POST
)
511 return host_relation
;
513 Relation port_relation
= ComparePort(parts_
, other
.parts_
);
514 if (port_relation
== DISJOINT_ORDER_PRE
||
515 port_relation
== DISJOINT_ORDER_POST
)
516 return port_relation
;
518 Relation scheme_relation
= CompareScheme(parts_
, other
.parts_
);
519 if (scheme_relation
== DISJOINT_ORDER_PRE
||
520 scheme_relation
== DISJOINT_ORDER_POST
)
521 return scheme_relation
;
523 if (host_relation
!= IDENTITY
)
524 return host_relation
;
525 if (port_relation
!= IDENTITY
)
526 return port_relation
;
527 return scheme_relation
;
530 bool ContentSettingsPattern::operator==(
531 const ContentSettingsPattern
& other
) const {
532 return Compare(other
) == IDENTITY
;
535 bool ContentSettingsPattern::operator!=(
536 const ContentSettingsPattern
& other
) const {
537 return !(*this == other
);
540 bool ContentSettingsPattern::operator<(
541 const ContentSettingsPattern
& other
) const {
542 return Compare(other
) < 0;
545 bool ContentSettingsPattern::operator>(
546 const ContentSettingsPattern
& other
) const {
547 return Compare(other
) > 0;
551 ContentSettingsPattern::Relation
ContentSettingsPattern::CompareHost(
552 const ContentSettingsPattern::PatternParts
& parts
,
553 const ContentSettingsPattern::PatternParts
& other_parts
) {
554 if (!parts
.has_domain_wildcard
&& !other_parts
.has_domain_wildcard
) {
555 // Case 1: No host starts with a wild card
556 int result
= CompareDomainNames(parts
.host
, other_parts
.host
);
558 return ContentSettingsPattern::IDENTITY
;
560 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
561 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
562 } else if (parts
.has_domain_wildcard
&& !other_parts
.has_domain_wildcard
) {
563 // Case 2: |host| starts with a domain wildcard and |other_host| does not
564 // start with a domain wildcard.
566 // "this" host: [*.]google.com
567 // "other" host: google.com
572 // [*.]mail.google.com
583 if (IsSubDomainOrEqual(other_parts
.host
, parts
.host
)) {
584 return ContentSettingsPattern::SUCCESSOR
;
586 if (CompareDomainNames(parts
.host
, other_parts
.host
) < 0)
587 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
588 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
590 } else if (!parts
.has_domain_wildcard
&& other_parts
.has_domain_wildcard
) {
591 // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
592 // with a domain wildcard.
593 if (IsSubDomainOrEqual(parts
.host
, other_parts
.host
)) {
594 return ContentSettingsPattern::PREDECESSOR
;
596 if (CompareDomainNames(parts
.host
, other_parts
.host
) < 0)
597 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
598 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
600 } else if (parts
.has_domain_wildcard
&& other_parts
.has_domain_wildcard
) {
601 // Case 4: |host| and |other_host| both start with a domain wildcard.
607 // [*.]mail.google.com
613 // [*.]mail.google.com
620 if (parts
.host
== other_parts
.host
) {
621 return ContentSettingsPattern::IDENTITY
;
622 } else if (IsSubDomainOrEqual(other_parts
.host
, parts
.host
)) {
623 return ContentSettingsPattern::SUCCESSOR
;
624 } else if (IsSubDomainOrEqual(parts
.host
, other_parts
.host
)) {
625 return ContentSettingsPattern::PREDECESSOR
;
627 if (CompareDomainNames(parts
.host
, other_parts
.host
) < 0)
628 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
629 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
634 return ContentSettingsPattern::IDENTITY
;
638 ContentSettingsPattern::Relation
ContentSettingsPattern::CompareScheme(
639 const ContentSettingsPattern::PatternParts
& parts
,
640 const ContentSettingsPattern::PatternParts
& other_parts
) {
641 if (parts
.is_scheme_wildcard
&& !other_parts
.is_scheme_wildcard
)
642 return ContentSettingsPattern::SUCCESSOR
;
643 if (!parts
.is_scheme_wildcard
&& other_parts
.is_scheme_wildcard
)
644 return ContentSettingsPattern::PREDECESSOR
;
646 int result
= parts
.scheme
.compare(other_parts
.scheme
);
648 return ContentSettingsPattern::IDENTITY
;
650 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
651 return ContentSettingsPattern::DISJOINT_ORDER_POST
;
655 ContentSettingsPattern::Relation
ContentSettingsPattern::ComparePort(
656 const ContentSettingsPattern::PatternParts
& parts
,
657 const ContentSettingsPattern::PatternParts
& other_parts
) {
658 if (parts
.is_port_wildcard
&& !other_parts
.is_port_wildcard
)
659 return ContentSettingsPattern::SUCCESSOR
;
660 if (!parts
.is_port_wildcard
&& other_parts
.is_port_wildcard
)
661 return ContentSettingsPattern::PREDECESSOR
;
663 int result
= parts
.port
.compare(other_parts
.port
);
665 return ContentSettingsPattern::IDENTITY
;
667 return ContentSettingsPattern::DISJOINT_ORDER_PRE
;
668 return ContentSettingsPattern::DISJOINT_ORDER_POST
;