Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / chrome / common / content_settings_pattern.cc
blob03a746ee618ce49dcc44ffa2597aec36758b75f8
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/common/content_settings_pattern.h"
7 #include <vector>
9 #include "base/memory/scoped_ptr.h"
10 #include "base/strings/string_split.h"
11 #include "base/strings/string_util.h"
12 #include "chrome/common/content_settings_pattern_parser.h"
13 #include "chrome/common/render_messages.h"
14 #include "chrome/common/url_constants.h"
15 #include "extensions/common/constants.h"
16 #include "ipc/ipc_message_utils.h"
17 #include "net/base/dns_util.h"
18 #include "net/base/net_util.h"
19 #include "url/gurl.h"
20 #include "url/url_canon.h"
22 namespace {
24 std::string GetDefaultPort(const std::string& scheme) {
25 if (scheme == url::kHttpScheme)
26 return "80";
27 if (scheme == url::kHttpsScheme)
28 return "443";
29 return std::string();
32 // Returns true if |sub_domain| is a sub domain or equls |domain|. E.g.
33 // "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
34 // subdomain of "host.com".
35 bool IsSubDomainOrEqual(const std::string& sub_domain,
36 const std::string& domain) {
37 // The empty string serves as wildcard. Each domain is a subdomain of the
38 // wildcard.
39 if (domain.empty())
40 return true;
41 const size_t match = sub_domain.rfind(domain);
42 if (match == std::string::npos ||
43 (match > 0 && sub_domain[match - 1] != '.') ||
44 (match + domain.length() != sub_domain.length())) {
45 return false;
47 return true;
50 // Compares two domain names.
51 int CompareDomainNames(const std::string& str1, const std::string& str2) {
52 std::vector<std::string> domain_name1;
53 std::vector<std::string> domain_name2;
55 base::SplitString(str1, '.', &domain_name1);
56 base::SplitString(str2, '.', &domain_name2);
58 int i1 = domain_name1.size() - 1;
59 int i2 = domain_name2.size() - 1;
60 int rv;
61 while (i1 >= 0 && i2 >= 0) {
62 // domain names are stored in puny code. So it's fine to use the compare
63 // method.
64 rv = domain_name1[i1].compare(domain_name2[i2]);
65 if (rv != 0)
66 return rv;
67 --i1;
68 --i2;
71 if (i1 > i2)
72 return 1;
74 if (i1 < i2)
75 return -1;
77 // The domain names are identical.
78 return 0;
81 typedef ContentSettingsPattern::BuilderInterface BuilderInterface;
83 } // namespace
85 // ////////////////////////////////////////////////////////////////////////////
86 // ContentSettingsPattern::Builder
88 ContentSettingsPattern::Builder::Builder(bool use_legacy_validate)
89 : is_valid_(true),
90 use_legacy_validate_(use_legacy_validate) {}
92 ContentSettingsPattern::Builder::~Builder() {}
94 BuilderInterface* ContentSettingsPattern::Builder::WithPort(
95 const std::string& port) {
96 parts_.port = port;
97 parts_.is_port_wildcard = false;
98 return this;
101 BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() {
102 parts_.port = "";
103 parts_.is_port_wildcard = true;
104 return this;
107 BuilderInterface* ContentSettingsPattern::Builder::WithHost(
108 const std::string& host) {
109 parts_.host = host;
110 return this;
113 BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() {
114 parts_.has_domain_wildcard = true;
115 return this;
118 BuilderInterface* ContentSettingsPattern::Builder::WithScheme(
119 const std::string& scheme) {
120 parts_.scheme = scheme;
121 parts_.is_scheme_wildcard = false;
122 return this;
125 BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() {
126 parts_.scheme = "";
127 parts_.is_scheme_wildcard = true;
128 return this;
131 BuilderInterface* ContentSettingsPattern::Builder::WithPath(
132 const std::string& path) {
133 parts_.path = path;
134 parts_.is_path_wildcard = false;
135 return this;
138 BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() {
139 parts_.path = "";
140 parts_.is_path_wildcard = true;
141 return this;
144 BuilderInterface* ContentSettingsPattern::Builder::Invalid() {
145 is_valid_ = false;
146 return this;
149 ContentSettingsPattern ContentSettingsPattern::Builder::Build() {
150 if (!is_valid_)
151 return ContentSettingsPattern();
152 if (!Canonicalize(&parts_))
153 return ContentSettingsPattern();
154 if (use_legacy_validate_) {
155 is_valid_ = LegacyValidate(parts_);
156 } else {
157 is_valid_ = Validate(parts_);
159 if (!is_valid_)
160 return ContentSettingsPattern();
162 // A pattern is invalid if canonicalization is not idempotent.
163 // This check is here because it should be checked no matter
164 // use_legacy_validate_ is.
165 PatternParts parts(parts_);
166 if (!Canonicalize(&parts))
167 return ContentSettingsPattern();
168 if (ContentSettingsPattern(parts_, true) !=
169 ContentSettingsPattern(parts, true)) {
170 return ContentSettingsPattern();
173 return ContentSettingsPattern(parts_, is_valid_);
176 // static
177 bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) {
178 // Canonicalize the scheme part.
179 const std::string scheme(base::StringToLowerASCII(parts->scheme));
180 parts->scheme = scheme;
182 if (parts->scheme == std::string(url::kFileScheme) &&
183 !parts->is_path_wildcard) {
184 GURL url(std::string(url::kFileScheme) +
185 std::string(url::kStandardSchemeSeparator) + parts->path);
186 parts->path = url.path();
189 // Canonicalize the host part.
190 const std::string host(parts->host);
191 url::CanonHostInfo host_info;
192 std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
193 if (host_info.IsIPAddress() && parts->has_domain_wildcard)
194 return false;
195 canonicalized_host = net::TrimEndingDot(canonicalized_host);
197 parts->host = "";
198 if ((host.find('*') == std::string::npos) &&
199 !canonicalized_host.empty()) {
200 // Valid host.
201 parts->host += canonicalized_host;
203 return true;
206 // static
207 bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) {
208 // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
209 if ((parts.is_scheme_wildcard && !parts.scheme.empty()) ||
210 (parts.is_port_wildcard && !parts.port.empty())) {
211 NOTREACHED();
212 return false;
215 // file:// URL patterns have an empty host and port.
216 if (parts.scheme == std::string(url::kFileScheme)) {
217 if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty())
218 return false;
219 if (parts.is_path_wildcard)
220 return parts.path.empty();
221 return (!parts.path.empty() &&
222 parts.path != "/" &&
223 parts.path.find("*") == std::string::npos);
226 // If the pattern is for an extension URL test if it is valid.
227 if (parts.scheme == std::string(extensions::kExtensionScheme) &&
228 parts.port.empty() &&
229 !parts.is_port_wildcard) {
230 return true;
233 // Non-file patterns are invalid if either the scheme, host or port part is
234 // empty.
235 if ((parts.scheme.empty() && !parts.is_scheme_wildcard) ||
236 (parts.host.empty() && !parts.has_domain_wildcard) ||
237 (parts.port.empty() && !parts.is_port_wildcard)) {
238 return false;
241 if (parts.host.find("*") != std::string::npos)
242 return false;
244 // Test if the scheme is supported or a wildcard.
245 if (!parts.is_scheme_wildcard &&
246 parts.scheme != std::string(url::kHttpScheme) &&
247 parts.scheme != std::string(url::kHttpsScheme)) {
248 return false;
250 return true;
253 // static
254 bool ContentSettingsPattern::Builder::LegacyValidate(
255 const PatternParts& parts) {
256 // If the pattern is for a "file-pattern" test if it is valid.
257 if (parts.scheme == std::string(url::kFileScheme) &&
258 !parts.is_scheme_wildcard &&
259 parts.host.empty() &&
260 parts.port.empty())
261 return true;
263 // If the pattern is for an extension URL test if it is valid.
264 if (parts.scheme == std::string(extensions::kExtensionScheme) &&
265 !parts.is_scheme_wildcard &&
266 !parts.host.empty() &&
267 !parts.has_domain_wildcard &&
268 parts.port.empty() &&
269 !parts.is_port_wildcard)
270 return true;
272 // Non-file patterns are invalid if either the scheme, host or port part is
273 // empty.
274 if ((!parts.is_scheme_wildcard) ||
275 (parts.host.empty() && !parts.has_domain_wildcard) ||
276 (!parts.is_port_wildcard))
277 return false;
279 // Test if the scheme is supported or a wildcard.
280 if (!parts.is_scheme_wildcard &&
281 parts.scheme != std::string(url::kHttpScheme) &&
282 parts.scheme != std::string(url::kHttpsScheme)) {
283 return false;
285 return true;
288 // ////////////////////////////////////////////////////////////////////////////
289 // ContentSettingsPattern::PatternParts
291 ContentSettingsPattern::PatternParts::PatternParts()
292 : is_scheme_wildcard(false),
293 has_domain_wildcard(false),
294 is_port_wildcard(false),
295 is_path_wildcard(false) {}
297 ContentSettingsPattern::PatternParts::~PatternParts() {}
299 // ////////////////////////////////////////////////////////////////////////////
300 // ContentSettingsPattern
303 // The version of the pattern format implemented. Version 1 includes the
304 // following patterns:
305 // - [*.]domain.tld (matches domain.tld and all sub-domains)
306 // - host (matches an exact hostname)
307 // - a.b.c.d (matches an exact IPv4 ip)
308 // - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
309 // - file:///tmp/test.html (a complete URL without a host)
310 // Version 2 adds a resource identifier for plugins.
311 // TODO(jochen): update once this feature is no longer behind a flag.
312 const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
314 // TODO(markusheintz): These two constants were moved to the Pattern Parser.
315 // Remove once the dependency of the ContentSettingsBaseProvider is removed.
316 const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
317 const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
319 // static
320 BuilderInterface* ContentSettingsPattern::CreateBuilder(
321 bool validate) {
322 return new Builder(validate);
325 // static
326 ContentSettingsPattern ContentSettingsPattern::FromURL(
327 const GURL& url) {
328 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
329 ContentSettingsPattern::CreateBuilder(false));
331 const GURL* local_url = &url;
332 if (url.SchemeIsFileSystem() && url.inner_url()) {
333 local_url = url.inner_url();
335 if (local_url->SchemeIsFile()) {
336 builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
337 } else {
338 // Please keep the order of the ifs below as URLs with an IP as host can
339 // also have a "http" scheme.
340 if (local_url->HostIsIPAddress()) {
341 builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
342 } else if (local_url->SchemeIs(url::kHttpScheme)) {
343 builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
344 local_url->host());
345 } else if (local_url->SchemeIs(url::kHttpsScheme)) {
346 builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost(
347 local_url->host());
348 } else {
349 // Unsupported scheme
351 if (local_url->port().empty()) {
352 if (local_url->SchemeIs(url::kHttpsScheme))
353 builder->WithPort(GetDefaultPort(url::kHttpsScheme));
354 else
355 builder->WithPortWildcard();
356 } else {
357 builder->WithPort(local_url->port());
360 return builder->Build();
363 // static
364 ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
365 const GURL& url) {
366 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
367 ContentSettingsPattern::CreateBuilder(false));
369 const GURL* local_url = &url;
370 if (url.SchemeIsFileSystem() && url.inner_url()) {
371 local_url = url.inner_url();
373 if (local_url->SchemeIsFile()) {
374 builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
375 } else {
376 builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
377 if (local_url->port().empty()) {
378 builder->WithPort(GetDefaultPort(local_url->scheme()));
379 } else {
380 builder->WithPort(local_url->port());
383 return builder->Build();
386 // static
387 ContentSettingsPattern ContentSettingsPattern::FromString(
388 const std::string& pattern_spec) {
389 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
390 ContentSettingsPattern::CreateBuilder(false));
391 content_settings::PatternParser::Parse(pattern_spec, builder.get());
392 return builder->Build();
395 // static
396 ContentSettingsPattern ContentSettingsPattern::LegacyFromString(
397 const std::string& pattern_spec) {
398 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
399 ContentSettingsPattern::CreateBuilder(true));
400 content_settings::PatternParser::Parse(pattern_spec, builder.get());
401 return builder->Build();
404 // static
405 ContentSettingsPattern ContentSettingsPattern::Wildcard() {
406 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
407 ContentSettingsPattern::CreateBuilder(true));
408 builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
409 WithPathWildcard();
410 return builder->Build();
413 ContentSettingsPattern::ContentSettingsPattern()
414 : is_valid_(false) {
417 ContentSettingsPattern::ContentSettingsPattern(
418 const PatternParts& parts,
419 bool valid)
420 : parts_(parts),
421 is_valid_(valid) {
424 void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const {
425 IPC::WriteParam(m, is_valid_);
426 IPC::WriteParam(m, parts_);
429 bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m,
430 PickleIterator* iter) {
431 return IPC::ReadParam(m, iter, &is_valid_) &&
432 IPC::ReadParam(m, iter, &parts_);
435 bool ContentSettingsPattern::Matches(
436 const GURL& url) const {
437 // An invalid pattern matches nothing.
438 if (!is_valid_)
439 return false;
441 const GURL* local_url = &url;
442 if (url.SchemeIsFileSystem() && url.inner_url()) {
443 local_url = url.inner_url();
446 // Match the scheme part.
447 const std::string scheme(local_url->scheme());
448 if (!parts_.is_scheme_wildcard &&
449 parts_.scheme != scheme) {
450 return false;
453 // File URLs have no host. Matches if the pattern has the path wildcard set,
454 // or if the path in the URL is identical to the one in the pattern.
455 // For filesystem:file URLs, the path used is the filesystem type, so all
456 // filesystem:file:///temporary/... are equivalent.
457 // TODO(markusheintz): Content settings should be defined for all files on
458 // a machine. Unless there is a good use case for supporting paths for file
459 // patterns, stop supporting path for file patterns.
460 if (!parts_.is_scheme_wildcard && scheme == url::kFileScheme)
461 return parts_.is_path_wildcard ||
462 parts_.path == std::string(local_url->path());
464 // Match the host part.
465 const std::string host(net::TrimEndingDot(local_url->host()));
466 if (!parts_.has_domain_wildcard) {
467 if (parts_.host != host)
468 return false;
469 } else {
470 if (!IsSubDomainOrEqual(host, parts_.host))
471 return false;
474 // For chrome extensions URLs ignore the port.
475 if (parts_.scheme == std::string(extensions::kExtensionScheme))
476 return true;
478 // Match the port part.
479 std::string port(local_url->port());
481 // Use the default port if the port string is empty. GURL returns an empty
482 // string if no port at all was specified or if the default port was
483 // specified.
484 if (port.empty()) {
485 port = GetDefaultPort(scheme);
488 if (!parts_.is_port_wildcard &&
489 parts_.port != port ) {
490 return false;
493 return true;
496 bool ContentSettingsPattern::MatchesAllHosts() const {
497 return parts_.has_domain_wildcard && parts_.host.empty();
500 const std::string ContentSettingsPattern::ToString() const {
501 if (IsValid())
502 return content_settings::PatternParser::ToString(parts_);
503 else
504 return std::string();
507 ContentSettingsPattern::Relation ContentSettingsPattern::Compare(
508 const ContentSettingsPattern& other) const {
509 // Two invalid patterns are identical in the way they behave. They don't match
510 // anything and are represented as an empty string. So it's fair to treat them
511 // as identical.
512 if ((this == &other) ||
513 (!is_valid_ && !other.is_valid_))
514 return IDENTITY;
516 if (!is_valid_ && other.is_valid_)
517 return DISJOINT_ORDER_POST;
518 if (is_valid_ && !other.is_valid_)
519 return DISJOINT_ORDER_PRE;
521 // If either host, port or scheme are disjoint return immediately.
522 Relation host_relation = CompareHost(parts_, other.parts_);
523 if (host_relation == DISJOINT_ORDER_PRE ||
524 host_relation == DISJOINT_ORDER_POST)
525 return host_relation;
527 Relation port_relation = ComparePort(parts_, other.parts_);
528 if (port_relation == DISJOINT_ORDER_PRE ||
529 port_relation == DISJOINT_ORDER_POST)
530 return port_relation;
532 Relation scheme_relation = CompareScheme(parts_, other.parts_);
533 if (scheme_relation == DISJOINT_ORDER_PRE ||
534 scheme_relation == DISJOINT_ORDER_POST)
535 return scheme_relation;
537 if (host_relation != IDENTITY)
538 return host_relation;
539 if (port_relation != IDENTITY)
540 return port_relation;
541 return scheme_relation;
544 bool ContentSettingsPattern::operator==(
545 const ContentSettingsPattern& other) const {
546 return Compare(other) == IDENTITY;
549 bool ContentSettingsPattern::operator!=(
550 const ContentSettingsPattern& other) const {
551 return !(*this == other);
554 bool ContentSettingsPattern::operator<(
555 const ContentSettingsPattern& other) const {
556 return Compare(other) < 0;
559 bool ContentSettingsPattern::operator>(
560 const ContentSettingsPattern& other) const {
561 return Compare(other) > 0;
564 // static
565 ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost(
566 const ContentSettingsPattern::PatternParts& parts,
567 const ContentSettingsPattern::PatternParts& other_parts) {
568 if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
569 // Case 1: No host starts with a wild card
570 int result = CompareDomainNames(parts.host, other_parts.host);
571 if (result == 0)
572 return ContentSettingsPattern::IDENTITY;
573 if (result < 0)
574 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
575 return ContentSettingsPattern::DISJOINT_ORDER_POST;
576 } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
577 // Case 2: |host| starts with a domain wildcard and |other_host| does not
578 // start with a domain wildcard.
579 // Examples:
580 // "this" host: [*.]google.com
581 // "other" host: google.com
583 // [*.]google.com
584 // mail.google.com
586 // [*.]mail.google.com
587 // google.com
589 // [*.]youtube.com
590 // google.de
592 // [*.]youtube.com
593 // mail.google.com
595 // *
596 // google.de
597 if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
598 return ContentSettingsPattern::SUCCESSOR;
599 } else {
600 if (CompareDomainNames(parts.host, other_parts.host) < 0)
601 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
602 return ContentSettingsPattern::DISJOINT_ORDER_POST;
604 } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
605 // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
606 // with a domain wildcard.
607 if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
608 return ContentSettingsPattern::PREDECESSOR;
609 } else {
610 if (CompareDomainNames(parts.host, other_parts.host) < 0)
611 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
612 return ContentSettingsPattern::DISJOINT_ORDER_POST;
614 } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
615 // Case 4: |host| and |other_host| both start with a domain wildcard.
616 // Examples:
617 // [*.]google.com
618 // [*.]google.com
620 // [*.]google.com
621 // [*.]mail.google.com
623 // [*.]youtube.com
624 // [*.]google.de
626 // [*.]youtube.com
627 // [*.]mail.google.com
629 // [*.]youtube.com
630 // *
632 // *
633 // [*.]youtube.com
634 if (parts.host == other_parts.host) {
635 return ContentSettingsPattern::IDENTITY;
636 } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
637 return ContentSettingsPattern::SUCCESSOR;
638 } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
639 return ContentSettingsPattern::PREDECESSOR;
640 } else {
641 if (CompareDomainNames(parts.host, other_parts.host) < 0)
642 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
643 return ContentSettingsPattern::DISJOINT_ORDER_POST;
647 NOTREACHED();
648 return ContentSettingsPattern::IDENTITY;
651 // static
652 ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme(
653 const ContentSettingsPattern::PatternParts& parts,
654 const ContentSettingsPattern::PatternParts& other_parts) {
655 if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard)
656 return ContentSettingsPattern::SUCCESSOR;
657 if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard)
658 return ContentSettingsPattern::PREDECESSOR;
660 int result = parts.scheme.compare(other_parts.scheme);
661 if (result == 0)
662 return ContentSettingsPattern::IDENTITY;
663 if (result > 0)
664 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
665 return ContentSettingsPattern::DISJOINT_ORDER_POST;
668 // static
669 ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort(
670 const ContentSettingsPattern::PatternParts& parts,
671 const ContentSettingsPattern::PatternParts& other_parts) {
672 if (parts.is_port_wildcard && !other_parts.is_port_wildcard)
673 return ContentSettingsPattern::SUCCESSOR;
674 if (!parts.is_port_wildcard && other_parts.is_port_wildcard)
675 return ContentSettingsPattern::PREDECESSOR;
677 int result = parts.port.compare(other_parts.port);
678 if (result == 0)
679 return ContentSettingsPattern::IDENTITY;
680 if (result > 0)
681 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
682 return ContentSettingsPattern::DISJOINT_ORDER_POST;