Added documentation to web_view.js/web_view_experimental.js regarding the webview...
[chromium-blink-merge.git] / extensions / common / url_pattern.cc
blobab33bd876b89daf9c862838b4640c86d878723d2
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "extensions/common/url_pattern.h"
7 #include "base/strings/string_number_conversions.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/string_split.h"
10 #include "base/strings/string_util.h"
11 #include "content/public/common/url_constants.h"
12 #include "extensions/common/constants.h"
13 #include "url/gurl.h"
14 #include "url/url_util.h"
16 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
18 namespace {
20 // TODO(aa): What about more obscure schemes like data: and javascript: ?
21 // Note: keep this array in sync with kValidSchemeMasks.
22 const char* kValidSchemes[] = {
23 content::kHttpScheme,
24 content::kHttpsScheme,
25 content::kFileScheme,
26 content::kFtpScheme,
27 chrome::kChromeUIScheme,
28 extensions::kExtensionScheme,
29 content::kFileSystemScheme,
32 const int kValidSchemeMasks[] = {
33 URLPattern::SCHEME_HTTP,
34 URLPattern::SCHEME_HTTPS,
35 URLPattern::SCHEME_FILE,
36 URLPattern::SCHEME_FTP,
37 URLPattern::SCHEME_CHROMEUI,
38 URLPattern::SCHEME_EXTENSION,
39 URLPattern::SCHEME_FILESYSTEM,
42 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
43 must_keep_these_arrays_in_sync);
45 const char kParseSuccess[] = "Success.";
46 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
47 const char kParseErrorInvalidScheme[] = "Invalid scheme.";
48 const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
49 const char kParseErrorEmptyHost[] = "Host can not be empty.";
50 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
51 const char kParseErrorEmptyPath[] = "Empty path.";
52 const char kParseErrorInvalidPort[] = "Invalid port.";
54 // Message explaining each URLPattern::ParseResult.
55 const char* const kParseResultMessages[] = {
56 kParseSuccess,
57 kParseErrorMissingSchemeSeparator,
58 kParseErrorInvalidScheme,
59 kParseErrorWrongSchemeType,
60 kParseErrorEmptyHost,
61 kParseErrorInvalidHostWildcard,
62 kParseErrorEmptyPath,
63 kParseErrorInvalidPort,
66 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
67 must_add_message_for_each_parse_result);
69 const char kPathSeparator[] = "/";
71 bool IsStandardScheme(const std::string& scheme) {
72 // "*" gets the same treatment as a standard scheme.
73 if (scheme == "*")
74 return true;
76 return url_util::IsStandard(scheme.c_str(),
77 url_parse::Component(0, static_cast<int>(scheme.length())));
80 bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
81 if (port == "*")
82 return true;
84 // Only accept non-wildcard ports if the scheme uses ports.
85 if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
86 url_parse::PORT_UNSPECIFIED) {
87 return false;
90 int parsed_port = url_parse::PORT_UNSPECIFIED;
91 if (!base::StringToInt(port, &parsed_port))
92 return false;
93 return (parsed_port >= 0) && (parsed_port < 65536);
96 // Returns |path| with the trailing wildcard stripped if one existed.
98 // The functions that rely on this (OverlapsWith and Contains) are only
99 // called for the patterns inside URLPatternSet. In those cases, we know that
100 // the path will have only a single wildcard at the end. This makes figuring
101 // out overlap much easier. It seems like there is probably a computer-sciency
102 // way to solve the general case, but we don't need that yet.
103 std::string StripTrailingWildcard(const std::string& path) {
104 size_t wildcard_index = path.find('*');
105 size_t path_last = path.size() - 1;
106 DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last);
107 return wildcard_index == path_last ? path.substr(0, path_last) : path;
110 } // namespace
112 URLPattern::URLPattern()
113 : valid_schemes_(SCHEME_NONE),
114 match_all_urls_(false),
115 match_subdomains_(false),
116 port_("*") {}
118 URLPattern::URLPattern(int valid_schemes)
119 : valid_schemes_(valid_schemes),
120 match_all_urls_(false),
121 match_subdomains_(false),
122 port_("*") {}
124 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
125 // Strict error checking is used, because this constructor is only
126 // appropriate when we know |pattern| is valid.
127 : valid_schemes_(valid_schemes),
128 match_all_urls_(false),
129 match_subdomains_(false),
130 port_("*") {
131 ParseResult result = Parse(pattern);
132 if (PARSE_SUCCESS != result)
133 NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
136 URLPattern::~URLPattern() {
139 bool URLPattern::operator<(const URLPattern& other) const {
140 return GetAsString() < other.GetAsString();
143 bool URLPattern::operator>(const URLPattern& other) const {
144 return GetAsString() > other.GetAsString();
147 bool URLPattern::operator==(const URLPattern& other) const {
148 return GetAsString() == other.GetAsString();
151 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
152 spec_.clear();
153 SetMatchAllURLs(false);
154 SetMatchSubdomains(false);
155 SetPort("*");
157 // Special case pattern to match every valid URL.
158 if (pattern == kAllUrlsPattern) {
159 SetMatchAllURLs(true);
160 return PARSE_SUCCESS;
163 // Parse out the scheme.
164 size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator);
165 bool has_standard_scheme_separator = true;
167 // Some urls also use ':' alone as the scheme separator.
168 if (scheme_end_pos == std::string::npos) {
169 scheme_end_pos = pattern.find(':');
170 has_standard_scheme_separator = false;
173 if (scheme_end_pos == std::string::npos)
174 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
176 if (!SetScheme(pattern.substr(0, scheme_end_pos)))
177 return PARSE_ERROR_INVALID_SCHEME;
179 bool standard_scheme = IsStandardScheme(scheme_);
180 if (standard_scheme != has_standard_scheme_separator)
181 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
183 // Advance past the scheme separator.
184 scheme_end_pos +=
185 (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1);
186 if (scheme_end_pos >= pattern.size())
187 return PARSE_ERROR_EMPTY_HOST;
189 // Parse out the host and path.
190 size_t host_start_pos = scheme_end_pos;
191 size_t path_start_pos = 0;
193 if (!standard_scheme) {
194 path_start_pos = host_start_pos;
195 } else if (scheme_ == content::kFileScheme) {
196 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
197 if (host_end_pos == std::string::npos) {
198 // Allow hostname omission.
199 // e.g. file://* is interpreted as file:///*,
200 // file://foo* is interpreted as file:///foo*.
201 path_start_pos = host_start_pos - 1;
202 } else {
203 // Ignore hostname if scheme is file://.
204 // e.g. file://localhost/foo is equal to file:///foo.
205 path_start_pos = host_end_pos;
207 } else {
208 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
210 // Host is required.
211 if (host_start_pos == host_end_pos)
212 return PARSE_ERROR_EMPTY_HOST;
214 if (host_end_pos == std::string::npos)
215 return PARSE_ERROR_EMPTY_PATH;
217 host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
219 // The first component can optionally be '*' to match all subdomains.
220 std::vector<std::string> host_components;
221 base::SplitString(host_, '.', &host_components);
222 if (host_components[0] == "*") {
223 match_subdomains_ = true;
224 host_components.erase(host_components.begin(),
225 host_components.begin() + 1);
227 host_ = JoinString(host_components, '.');
229 path_start_pos = host_end_pos;
232 SetPath(pattern.substr(path_start_pos));
234 size_t port_pos = host_.find(':');
235 if (port_pos != std::string::npos) {
236 if (!SetPort(host_.substr(port_pos + 1)))
237 return PARSE_ERROR_INVALID_PORT;
238 host_ = host_.substr(0, port_pos);
241 // No other '*' can occur in the host, though. This isn't necessary, but is
242 // done as a convenience to developers who might otherwise be confused and
243 // think '*' works as a glob in the host.
244 if (host_.find('*') != std::string::npos)
245 return PARSE_ERROR_INVALID_HOST_WILDCARD;
247 return PARSE_SUCCESS;
250 void URLPattern::SetValidSchemes(int valid_schemes) {
251 spec_.clear();
252 valid_schemes_ = valid_schemes;
255 void URLPattern::SetHost(const std::string& host) {
256 spec_.clear();
257 host_ = host;
260 void URLPattern::SetMatchAllURLs(bool val) {
261 spec_.clear();
262 match_all_urls_ = val;
264 if (val) {
265 match_subdomains_ = true;
266 scheme_ = "*";
267 host_.clear();
268 SetPath("/*");
272 void URLPattern::SetMatchSubdomains(bool val) {
273 spec_.clear();
274 match_subdomains_ = val;
277 bool URLPattern::SetScheme(const std::string& scheme) {
278 spec_.clear();
279 scheme_ = scheme;
280 if (scheme_ == "*") {
281 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
282 } else if (!IsValidScheme(scheme_)) {
283 return false;
285 return true;
288 bool URLPattern::IsValidScheme(const std::string& scheme) const {
289 if (valid_schemes_ == SCHEME_ALL)
290 return true;
292 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
293 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
294 return true;
297 return false;
300 void URLPattern::SetPath(const std::string& path) {
301 spec_.clear();
302 path_ = path;
303 path_escaped_ = path_;
304 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
305 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
308 bool URLPattern::SetPort(const std::string& port) {
309 spec_.clear();
310 if (IsValidPortForScheme(scheme_, port)) {
311 port_ = port;
312 return true;
314 return false;
317 bool URLPattern::MatchesURL(const GURL& test) const {
318 const GURL* test_url = &test;
319 bool has_inner_url = test.inner_url() != NULL;
321 if (has_inner_url) {
322 if (!test.SchemeIsFileSystem())
323 return false; // The only nested URLs we handle are filesystem URLs.
324 test_url = test.inner_url();
327 if (!MatchesScheme(test_url->scheme()))
328 return false;
330 if (match_all_urls_)
331 return true;
333 std::string path_for_request = test.PathForRequest();
334 if (has_inner_url)
335 path_for_request = test_url->path() + path_for_request;
337 return MatchesSecurityOriginHelper(*test_url) &&
338 MatchesPath(path_for_request);
341 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
342 const GURL* test_url = &test;
343 bool has_inner_url = test.inner_url() != NULL;
345 if (has_inner_url) {
346 if (!test.SchemeIsFileSystem())
347 return false; // The only nested URLs we handle are filesystem URLs.
348 test_url = test.inner_url();
351 if (!MatchesScheme(test_url->scheme()))
352 return false;
354 if (match_all_urls_)
355 return true;
357 return MatchesSecurityOriginHelper(*test_url);
360 bool URLPattern::MatchesScheme(const std::string& test) const {
361 if (!IsValidScheme(test))
362 return false;
364 return scheme_ == "*" || test == scheme_;
367 bool URLPattern::MatchesHost(const std::string& host) const {
368 std::string test(content::kHttpScheme);
369 test += content::kStandardSchemeSeparator;
370 test += host;
371 test += "/";
372 return MatchesHost(GURL(test));
375 bool URLPattern::MatchesHost(const GURL& test) const {
376 // If the hosts are exactly equal, we have a match.
377 if (test.host() == host_)
378 return true;
380 // If we're matching subdomains, and we have no host in the match pattern,
381 // that means that we're matching all hosts, which means we have a match no
382 // matter what the test host is.
383 if (match_subdomains_ && host_.empty())
384 return true;
386 // Otherwise, we can only match if our match pattern matches subdomains.
387 if (!match_subdomains_)
388 return false;
390 // We don't do subdomain matching against IP addresses, so we can give up now
391 // if the test host is an IP address.
392 if (test.HostIsIPAddress())
393 return false;
395 // Check if the test host is a subdomain of our host.
396 if (test.host().length() <= (host_.length() + 1))
397 return false;
399 if (test.host().compare(test.host().length() - host_.length(),
400 host_.length(), host_) != 0)
401 return false;
403 return test.host()[test.host().length() - host_.length() - 1] == '.';
406 bool URLPattern::MatchesPath(const std::string& test) const {
407 // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
408 // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
409 if (test + "/*" == path_escaped_)
410 return true;
412 return MatchPattern(test, path_escaped_);
415 const std::string& URLPattern::GetAsString() const {
416 if (!spec_.empty())
417 return spec_;
419 if (match_all_urls_) {
420 spec_ = kAllUrlsPattern;
421 return spec_;
424 bool standard_scheme = IsStandardScheme(scheme_);
426 std::string spec = scheme_ +
427 (standard_scheme ? content::kStandardSchemeSeparator : ":");
429 if (scheme_ != content::kFileScheme && standard_scheme) {
430 if (match_subdomains_) {
431 spec += "*";
432 if (!host_.empty())
433 spec += ".";
436 if (!host_.empty())
437 spec += host_;
439 if (port_ != "*") {
440 spec += ":";
441 spec += port_;
445 if (!path_.empty())
446 spec += path_;
448 spec_ = spec;
449 return spec_;
452 bool URLPattern::OverlapsWith(const URLPattern& other) const {
453 if (match_all_urls() || other.match_all_urls())
454 return true;
455 return (MatchesAnyScheme(other.GetExplicitSchemes()) ||
456 other.MatchesAnyScheme(GetExplicitSchemes()))
457 && (MatchesHost(other.host()) || other.MatchesHost(host()))
458 && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port()))
459 && (MatchesPath(StripTrailingWildcard(other.path())) ||
460 other.MatchesPath(StripTrailingWildcard(path())));
463 bool URLPattern::Contains(const URLPattern& other) const {
464 if (match_all_urls())
465 return true;
466 return MatchesAllSchemes(other.GetExplicitSchemes())
467 && MatchesHost(other.host())
468 && MatchesPortPattern(other.port())
469 && MatchesPath(StripTrailingWildcard(other.path()));
472 bool URLPattern::MatchesAnyScheme(
473 const std::vector<std::string>& schemes) const {
474 for (std::vector<std::string>::const_iterator i = schemes.begin();
475 i != schemes.end(); ++i) {
476 if (MatchesScheme(*i))
477 return true;
480 return false;
483 bool URLPattern::MatchesAllSchemes(
484 const std::vector<std::string>& schemes) const {
485 for (std::vector<std::string>::const_iterator i = schemes.begin();
486 i != schemes.end(); ++i) {
487 if (!MatchesScheme(*i))
488 return false;
491 return true;
494 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
495 // Ignore hostname if scheme is file://.
496 if (scheme_ != content::kFileScheme && !MatchesHost(test))
497 return false;
499 if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort())))
500 return false;
502 return true;
505 bool URLPattern::MatchesPortPattern(const std::string& port) const {
506 return port_ == "*" || port_ == port;
509 std::vector<std::string> URLPattern::GetExplicitSchemes() const {
510 std::vector<std::string> result;
512 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
513 result.push_back(scheme_);
514 return result;
517 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
518 if (MatchesScheme(kValidSchemes[i])) {
519 result.push_back(kValidSchemes[i]);
523 return result;
526 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
527 std::vector<std::string> explicit_schemes = GetExplicitSchemes();
528 std::vector<URLPattern> result;
530 for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
531 i != explicit_schemes.end(); ++i) {
532 URLPattern temp = *this;
533 temp.SetScheme(*i);
534 temp.SetMatchAllURLs(false);
535 result.push_back(temp);
538 return result;
541 // static
542 const char* URLPattern::GetParseResultString(
543 URLPattern::ParseResult parse_result) {
544 return kParseResultMessages[parse_result];