By moving the call to Load() up in SearchProvider::Start(), we are giving a chance...
[chromium-blink-merge.git] / chrome / common / content_settings_pattern.cc
blob4420187ca293f8962e35312caf4c2fd1942d9def
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/common/content_settings_pattern.h"
7 #include <vector>
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_split.h"
11 #include "base/string_util.h"
12 #include "chrome/common/content_settings_pattern_parser.h"
13 #include "chrome/common/render_messages.h"
14 #include "chrome/common/url_constants.h"
15 #include "extensions/common/constants.h"
16 #include "googleurl/src/gurl.h"
17 #include "googleurl/src/url_canon.h"
18 #include "ipc/ipc_message_utils.h"
19 #include "net/base/dns_util.h"
20 #include "net/base/net_util.h"
22 namespace {
24 std::string GetDefaultPort(const std::string& scheme) {
25 if (scheme == chrome::kHttpScheme)
26 return "80";
27 if (scheme == chrome::kHttpsScheme)
28 return "443";
29 return "";
32 // Returns true if |sub_domain| is a sub domain or equls |domain|. E.g.
33 // "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
34 // subdomain of "host.com".
35 bool IsSubDomainOrEqual(const std::string& sub_domain,
36 const std::string& domain) {
37 // The empty string serves as wildcard. Each domain is a subdomain of the
38 // wildcard.
39 if (domain.empty())
40 return true;
41 const size_t match = sub_domain.rfind(domain);
42 if (match == std::string::npos ||
43 (match > 0 && sub_domain[match - 1] != '.') ||
44 (match + domain.length() != sub_domain.length())) {
45 return false;
47 return true;
50 // Compares two domain names.
51 int CompareDomainNames(const std::string& str1, const std::string& str2) {
52 std::vector<std::string> domain_name1;
53 std::vector<std::string> domain_name2;
55 base::SplitString(str1, '.', &domain_name1);
56 base::SplitString(str2, '.', &domain_name2);
58 int i1 = domain_name1.size() - 1;
59 int i2 = domain_name2.size() - 1;
60 int rv;
61 while (i1 >= 0 && i2 >= 0) {
62 // domain names are stored in puny code. So it's fine to use the compare
63 // method.
64 rv = domain_name1[i1].compare(domain_name2[i2]);
65 if (rv != 0)
66 return rv;
67 --i1;
68 --i2;
71 if (i1 > i2)
72 return 1;
74 if (i1 < i2)
75 return -1;
77 // The domain names are identical.
78 return 0;
81 typedef ContentSettingsPattern::BuilderInterface BuilderInterface;
83 } // namespace
85 // ////////////////////////////////////////////////////////////////////////////
86 // ContentSettingsPattern::Builder
88 ContentSettingsPattern::Builder::Builder(bool use_legacy_validate)
89 : is_valid_(true),
90 use_legacy_validate_(use_legacy_validate) {}
92 ContentSettingsPattern::Builder::~Builder() {}
94 BuilderInterface* ContentSettingsPattern::Builder::WithPort(
95 const std::string& port) {
96 parts_.port = port;
97 parts_.is_port_wildcard = false;
98 return this;
101 BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() {
102 parts_.port = "";
103 parts_.is_port_wildcard = true;
104 return this;
107 BuilderInterface* ContentSettingsPattern::Builder::WithHost(
108 const std::string& host) {
109 parts_.host = host;
110 return this;
113 BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() {
114 parts_.has_domain_wildcard = true;
115 return this;
118 BuilderInterface* ContentSettingsPattern::Builder::WithScheme(
119 const std::string& scheme) {
120 parts_.scheme = scheme;
121 parts_.is_scheme_wildcard = false;
122 return this;
125 BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() {
126 parts_.scheme = "";
127 parts_.is_scheme_wildcard = true;
128 return this;
131 BuilderInterface* ContentSettingsPattern::Builder::WithPath(
132 const std::string& path) {
133 parts_.path = path;
134 parts_.is_path_wildcard = false;
135 return this;
138 BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() {
139 parts_.path = "";
140 parts_.is_path_wildcard = true;
141 return this;
144 BuilderInterface* ContentSettingsPattern::Builder::Invalid() {
145 is_valid_ = false;
146 return this;
149 ContentSettingsPattern ContentSettingsPattern::Builder::Build() {
150 if (!is_valid_)
151 return ContentSettingsPattern();
152 if (!Canonicalize(&parts_))
153 return ContentSettingsPattern();
154 if (use_legacy_validate_) {
155 is_valid_ = LegacyValidate(parts_);
156 } else {
157 is_valid_ = Validate(parts_);
159 return ContentSettingsPattern(parts_, is_valid_);
162 // static
163 bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) {
164 // Canonicalize the scheme part.
165 const std::string scheme(StringToLowerASCII(parts->scheme));
166 parts->scheme = scheme;
168 if (parts->scheme == std::string(chrome::kFileScheme) &&
169 !parts->is_path_wildcard) {
170 GURL url(std::string(chrome::kFileScheme) +
171 std::string(content::kStandardSchemeSeparator) + parts->path);
172 parts->path = url.path();
175 // Canonicalize the host part.
176 const std::string host(parts->host);
177 url_canon::CanonHostInfo host_info;
178 std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
179 if (host_info.IsIPAddress() && parts->has_domain_wildcard)
180 return false;
181 canonicalized_host = net::TrimEndingDot(canonicalized_host);
183 parts->host = "";
184 if ((host.find('*') == std::string::npos) &&
185 !canonicalized_host.empty()) {
186 // Valid host.
187 parts->host += canonicalized_host;
189 return true;
192 // static
193 bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) {
194 // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
195 if ((parts.is_scheme_wildcard && !parts.scheme.empty()) ||
196 (parts.is_port_wildcard && !parts.port.empty())) {
197 NOTREACHED();
198 return false;
201 // file:// URL patterns have an empty host and port.
202 if (parts.scheme == std::string(chrome::kFileScheme)) {
203 if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty())
204 return false;
205 if (parts.is_path_wildcard)
206 return parts.path.empty();
207 return (!parts.path.empty() &&
208 parts.path != "/" &&
209 parts.path.find("*") == std::string::npos);
212 // If the pattern is for an extension URL test if it is valid.
213 if (parts.scheme == std::string(extensions::kExtensionScheme) &&
214 parts.port.empty() &&
215 !parts.is_port_wildcard) {
216 return true;
219 // Non-file patterns are invalid if either the scheme, host or port part is
220 // empty.
221 if ((parts.scheme.empty() && !parts.is_scheme_wildcard) ||
222 (parts.host.empty() && !parts.has_domain_wildcard) ||
223 (parts.port.empty() && !parts.is_port_wildcard)) {
224 return false;
227 if (parts.host.find("*") != std::string::npos)
228 return false;
230 // Test if the scheme is supported or a wildcard.
231 if (!parts.is_scheme_wildcard &&
232 parts.scheme != std::string(chrome::kHttpScheme) &&
233 parts.scheme != std::string(chrome::kHttpsScheme)) {
234 return false;
236 return true;
239 // static
240 bool ContentSettingsPattern::Builder::LegacyValidate(
241 const PatternParts& parts) {
242 // If the pattern is for a "file-pattern" test if it is valid.
243 if (parts.scheme == std::string(chrome::kFileScheme) &&
244 !parts.is_scheme_wildcard &&
245 parts.host.empty() &&
246 parts.port.empty())
247 return true;
249 // If the pattern is for an extension URL test if it is valid.
250 if (parts.scheme == std::string(extensions::kExtensionScheme) &&
251 !parts.is_scheme_wildcard &&
252 !parts.host.empty() &&
253 !parts.has_domain_wildcard &&
254 parts.port.empty() &&
255 !parts.is_port_wildcard)
256 return true;
258 // Non-file patterns are invalid if either the scheme, host or port part is
259 // empty.
260 if ((!parts.is_scheme_wildcard) ||
261 (parts.host.empty() && !parts.has_domain_wildcard) ||
262 (!parts.is_port_wildcard))
263 return false;
265 // Test if the scheme is supported or a wildcard.
266 if (!parts.is_scheme_wildcard &&
267 parts.scheme != std::string(chrome::kHttpScheme) &&
268 parts.scheme != std::string(chrome::kHttpsScheme)) {
269 return false;
271 return true;
274 // ////////////////////////////////////////////////////////////////////////////
275 // ContentSettingsPattern::PatternParts
277 ContentSettingsPattern::PatternParts::PatternParts()
278 : is_scheme_wildcard(false),
279 has_domain_wildcard(false),
280 is_port_wildcard(false),
281 is_path_wildcard(false) {}
283 ContentSettingsPattern::PatternParts::~PatternParts() {}
285 // ////////////////////////////////////////////////////////////////////////////
286 // ContentSettingsPattern
289 // The version of the pattern format implemented. Version 1 includes the
290 // following patterns:
291 // - [*.]domain.tld (matches domain.tld and all sub-domains)
292 // - host (matches an exact hostname)
293 // - a.b.c.d (matches an exact IPv4 ip)
294 // - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
295 // - file:///tmp/test.html (a complete URL without a host)
296 // Version 2 adds a resource identifier for plugins.
297 // TODO(jochen): update once this feature is no longer behind a flag.
298 const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
300 // TODO(markusheintz): These two constants were moved to the Pattern Parser.
301 // Remove once the dependency of the ContentSettingsBaseProvider is removed.
302 const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
303 const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
305 // static
306 BuilderInterface* ContentSettingsPattern::CreateBuilder(
307 bool validate) {
308 return new Builder(validate);
311 // static
312 ContentSettingsPattern ContentSettingsPattern::FromURL(
313 const GURL& url) {
314 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
315 ContentSettingsPattern::CreateBuilder(false));
317 const GURL* local_url = &url;
318 if (url.SchemeIsFileSystem() && url.inner_url()) {
319 local_url = url.inner_url();
321 if (local_url->SchemeIsFile()) {
322 builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
323 } else {
324 // Please keep the order of the ifs below as URLs with an IP as host can
325 // also have a "http" scheme.
326 if (local_url->HostIsIPAddress()) {
327 builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
328 } else if (local_url->SchemeIs(chrome::kHttpScheme)) {
329 builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
330 local_url->host());
331 } else if (local_url->SchemeIs(chrome::kHttpsScheme)) {
332 builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost(
333 local_url->host());
334 } else {
335 // Unsupported scheme
337 if (local_url->port().empty()) {
338 if (local_url->SchemeIs(chrome::kHttpsScheme))
339 builder->WithPort(GetDefaultPort(chrome::kHttpsScheme));
340 else
341 builder->WithPortWildcard();
342 } else {
343 builder->WithPort(local_url->port());
346 return builder->Build();
349 // static
350 ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
351 const GURL& url) {
352 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
353 ContentSettingsPattern::CreateBuilder(false));
355 const GURL* local_url = &url;
356 if (url.SchemeIsFileSystem() && url.inner_url()) {
357 local_url = url.inner_url();
359 if (local_url->SchemeIsFile()) {
360 builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
361 } else {
362 builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
363 if (local_url->port().empty()) {
364 builder->WithPort(GetDefaultPort(local_url->scheme()));
365 } else {
366 builder->WithPort(local_url->port());
369 return builder->Build();
372 // static
373 ContentSettingsPattern ContentSettingsPattern::FromString(
374 const std::string& pattern_spec) {
375 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
376 ContentSettingsPattern::CreateBuilder(false));
377 content_settings::PatternParser::Parse(pattern_spec, builder.get());
378 return builder->Build();
381 // static
382 ContentSettingsPattern ContentSettingsPattern::LegacyFromString(
383 const std::string& pattern_spec) {
384 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
385 ContentSettingsPattern::CreateBuilder(true));
386 content_settings::PatternParser::Parse(pattern_spec, builder.get());
387 return builder->Build();
390 // static
391 ContentSettingsPattern ContentSettingsPattern::Wildcard() {
392 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
393 ContentSettingsPattern::CreateBuilder(true));
394 builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
395 WithPathWildcard();
396 return builder->Build();
399 ContentSettingsPattern::ContentSettingsPattern()
400 : is_valid_(false) {
403 ContentSettingsPattern::ContentSettingsPattern(
404 const PatternParts& parts,
405 bool valid)
406 : parts_(parts),
407 is_valid_(valid) {
410 void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const {
411 IPC::WriteParam(m, is_valid_);
412 IPC::WriteParam(m, parts_);
415 bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m,
416 PickleIterator* iter) {
417 return IPC::ReadParam(m, iter, &is_valid_) &&
418 IPC::ReadParam(m, iter, &parts_);
421 bool ContentSettingsPattern::Matches(
422 const GURL& url) const {
423 // An invalid pattern matches nothing.
424 if (!is_valid_)
425 return false;
427 const GURL* local_url = &url;
428 if (url.SchemeIsFileSystem() && url.inner_url()) {
429 local_url = url.inner_url();
432 // Match the scheme part.
433 const std::string scheme(local_url->scheme());
434 if (!parts_.is_scheme_wildcard &&
435 parts_.scheme != scheme) {
436 return false;
439 // File URLs have no host. Matches if the pattern has the path wildcard set,
440 // or if the path in the URL is identical to the one in the pattern.
441 // For filesystem:file URLs, the path used is the filesystem type, so all
442 // filesystem:file:///temporary/... are equivalent.
443 // TODO(markusheintz): Content settings should be defined for all files on
444 // a machine. Unless there is a good use case for supporting paths for file
445 // patterns, stop supporting path for file patterns.
446 if (!parts_.is_scheme_wildcard && scheme == chrome::kFileScheme)
447 return parts_.is_path_wildcard ||
448 parts_.path == std::string(local_url->path());
450 // Match the host part.
451 const std::string host(net::TrimEndingDot(local_url->host()));
452 if (!parts_.has_domain_wildcard) {
453 if (parts_.host != host)
454 return false;
455 } else {
456 if (!IsSubDomainOrEqual(host, parts_.host))
457 return false;
460 // For chrome extensions URLs ignore the port.
461 if (parts_.scheme == std::string(extensions::kExtensionScheme))
462 return true;
464 // Match the port part.
465 std::string port(local_url->port());
467 // Use the default port if the port string is empty. GURL returns an empty
468 // string if no port at all was specified or if the default port was
469 // specified.
470 if (port.empty()) {
471 port = GetDefaultPort(scheme);
474 if (!parts_.is_port_wildcard &&
475 parts_.port != port ) {
476 return false;
479 return true;
482 bool ContentSettingsPattern::MatchesAllHosts() const {
483 return parts_.has_domain_wildcard && parts_.host.empty();
486 const std::string ContentSettingsPattern::ToString() const {
487 if (IsValid())
488 return content_settings::PatternParser::ToString(parts_);
489 else
490 return "";
493 ContentSettingsPattern::Relation ContentSettingsPattern::Compare(
494 const ContentSettingsPattern& other) const {
495 // Two invalid patterns are identical in the way they behave. They don't match
496 // anything and are represented as an empty string. So it's fair to treat them
497 // as identical.
498 if ((this == &other) ||
499 (!is_valid_ && !other.is_valid_))
500 return IDENTITY;
502 if (!is_valid_ && other.is_valid_)
503 return DISJOINT_ORDER_POST;
504 if (is_valid_ && !other.is_valid_)
505 return DISJOINT_ORDER_PRE;
507 // If either host, port or scheme are disjoint return immediately.
508 Relation host_relation = CompareHost(parts_, other.parts_);
509 if (host_relation == DISJOINT_ORDER_PRE ||
510 host_relation == DISJOINT_ORDER_POST)
511 return host_relation;
513 Relation port_relation = ComparePort(parts_, other.parts_);
514 if (port_relation == DISJOINT_ORDER_PRE ||
515 port_relation == DISJOINT_ORDER_POST)
516 return port_relation;
518 Relation scheme_relation = CompareScheme(parts_, other.parts_);
519 if (scheme_relation == DISJOINT_ORDER_PRE ||
520 scheme_relation == DISJOINT_ORDER_POST)
521 return scheme_relation;
523 if (host_relation != IDENTITY)
524 return host_relation;
525 if (port_relation != IDENTITY)
526 return port_relation;
527 return scheme_relation;
530 bool ContentSettingsPattern::operator==(
531 const ContentSettingsPattern& other) const {
532 return Compare(other) == IDENTITY;
535 bool ContentSettingsPattern::operator!=(
536 const ContentSettingsPattern& other) const {
537 return !(*this == other);
540 bool ContentSettingsPattern::operator<(
541 const ContentSettingsPattern& other) const {
542 return Compare(other) < 0;
545 bool ContentSettingsPattern::operator>(
546 const ContentSettingsPattern& other) const {
547 return Compare(other) > 0;
550 // static
551 ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost(
552 const ContentSettingsPattern::PatternParts& parts,
553 const ContentSettingsPattern::PatternParts& other_parts) {
554 if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
555 // Case 1: No host starts with a wild card
556 int result = CompareDomainNames(parts.host, other_parts.host);
557 if (result == 0)
558 return ContentSettingsPattern::IDENTITY;
559 if (result < 0)
560 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
561 return ContentSettingsPattern::DISJOINT_ORDER_POST;
562 } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
563 // Case 2: |host| starts with a domain wildcard and |other_host| does not
564 // start with a domain wildcard.
565 // Examples:
566 // "this" host: [*.]google.com
567 // "other" host: google.com
569 // [*.]google.com
570 // mail.google.com
572 // [*.]mail.google.com
573 // google.com
575 // [*.]youtube.com
576 // google.de
578 // [*.]youtube.com
579 // mail.google.com
581 // *
582 // google.de
583 if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
584 return ContentSettingsPattern::SUCCESSOR;
585 } else {
586 if (CompareDomainNames(parts.host, other_parts.host) < 0)
587 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
588 return ContentSettingsPattern::DISJOINT_ORDER_POST;
590 } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
591 // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
592 // with a domain wildcard.
593 if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
594 return ContentSettingsPattern::PREDECESSOR;
595 } else {
596 if (CompareDomainNames(parts.host, other_parts.host) < 0)
597 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
598 return ContentSettingsPattern::DISJOINT_ORDER_POST;
600 } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
601 // Case 4: |host| and |other_host| both start with a domain wildcard.
602 // Examples:
603 // [*.]google.com
604 // [*.]google.com
606 // [*.]google.com
607 // [*.]mail.google.com
609 // [*.]youtube.com
610 // [*.]google.de
612 // [*.]youtube.com
613 // [*.]mail.google.com
615 // [*.]youtube.com
616 // *
618 // *
619 // [*.]youtube.com
620 if (parts.host == other_parts.host) {
621 return ContentSettingsPattern::IDENTITY;
622 } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
623 return ContentSettingsPattern::SUCCESSOR;
624 } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
625 return ContentSettingsPattern::PREDECESSOR;
626 } else {
627 if (CompareDomainNames(parts.host, other_parts.host) < 0)
628 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
629 return ContentSettingsPattern::DISJOINT_ORDER_POST;
633 NOTREACHED();
634 return ContentSettingsPattern::IDENTITY;
637 // static
638 ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme(
639 const ContentSettingsPattern::PatternParts& parts,
640 const ContentSettingsPattern::PatternParts& other_parts) {
641 if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard)
642 return ContentSettingsPattern::SUCCESSOR;
643 if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard)
644 return ContentSettingsPattern::PREDECESSOR;
646 int result = parts.scheme.compare(other_parts.scheme);
647 if (result == 0)
648 return ContentSettingsPattern::IDENTITY;
649 if (result > 0)
650 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
651 return ContentSettingsPattern::DISJOINT_ORDER_POST;
654 // static
655 ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort(
656 const ContentSettingsPattern::PatternParts& parts,
657 const ContentSettingsPattern::PatternParts& other_parts) {
658 if (parts.is_port_wildcard && !other_parts.is_port_wildcard)
659 return ContentSettingsPattern::SUCCESSOR;
660 if (!parts.is_port_wildcard && other_parts.is_port_wildcard)
661 return ContentSettingsPattern::PREDECESSOR;
663 int result = parts.port.compare(other_parts.port);
664 if (result == 0)
665 return ContentSettingsPattern::IDENTITY;
666 if (result > 0)
667 return ContentSettingsPattern::DISJOINT_ORDER_PRE;
668 return ContentSettingsPattern::DISJOINT_ORDER_POST;