1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "extensions/common/url_pattern.h"
7 #include "base/strings/string_number_conversions.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/string_split.h"
10 #include "base/strings/string_util.h"
11 #include "content/public/common/url_constants.h"
12 #include "extensions/common/constants.h"
14 #include "url/url_util.h"
16 const char URLPattern::kAllUrlsPattern
[] = "<all_urls>";
20 // TODO(aa): What about more obscure schemes like data: and javascript: ?
21 // Note: keep this array in sync with kValidSchemeMasks.
22 const char* kValidSchemes
[] = {
24 content::kHttpsScheme
,
27 chrome::kChromeUIScheme
,
28 extensions::kExtensionScheme
,
29 content::kFileSystemScheme
,
32 const int kValidSchemeMasks
[] = {
33 URLPattern::SCHEME_HTTP
,
34 URLPattern::SCHEME_HTTPS
,
35 URLPattern::SCHEME_FILE
,
36 URLPattern::SCHEME_FTP
,
37 URLPattern::SCHEME_CHROMEUI
,
38 URLPattern::SCHEME_EXTENSION
,
39 URLPattern::SCHEME_FILESYSTEM
,
42 COMPILE_ASSERT(arraysize(kValidSchemes
) == arraysize(kValidSchemeMasks
),
43 must_keep_these_arrays_in_sync
);
45 const char kParseSuccess
[] = "Success.";
46 const char kParseErrorMissingSchemeSeparator
[] = "Missing scheme separator.";
47 const char kParseErrorInvalidScheme
[] = "Invalid scheme.";
48 const char kParseErrorWrongSchemeType
[] = "Wrong scheme type.";
49 const char kParseErrorEmptyHost
[] = "Host can not be empty.";
50 const char kParseErrorInvalidHostWildcard
[] = "Invalid host wildcard.";
51 const char kParseErrorEmptyPath
[] = "Empty path.";
52 const char kParseErrorInvalidPort
[] = "Invalid port.";
54 // Message explaining each URLPattern::ParseResult.
55 const char* const kParseResultMessages
[] = {
57 kParseErrorMissingSchemeSeparator
,
58 kParseErrorInvalidScheme
,
59 kParseErrorWrongSchemeType
,
61 kParseErrorInvalidHostWildcard
,
63 kParseErrorInvalidPort
,
66 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS
== arraysize(kParseResultMessages
),
67 must_add_message_for_each_parse_result
);
69 const char kPathSeparator
[] = "/";
71 bool IsStandardScheme(const std::string
& scheme
) {
72 // "*" gets the same treatment as a standard scheme.
76 return url_util::IsStandard(scheme
.c_str(),
77 url_parse::Component(0, static_cast<int>(scheme
.length())));
80 bool IsValidPortForScheme(const std::string
& scheme
, const std::string
& port
) {
84 // Only accept non-wildcard ports if the scheme uses ports.
85 if (url_canon::DefaultPortForScheme(scheme
.c_str(), scheme
.length()) ==
86 url_parse::PORT_UNSPECIFIED
) {
90 int parsed_port
= url_parse::PORT_UNSPECIFIED
;
91 if (!base::StringToInt(port
, &parsed_port
))
93 return (parsed_port
>= 0) && (parsed_port
< 65536);
96 // Returns |path| with the trailing wildcard stripped if one existed.
98 // The functions that rely on this (OverlapsWith and Contains) are only
99 // called for the patterns inside URLPatternSet. In those cases, we know that
100 // the path will have only a single wildcard at the end. This makes figuring
101 // out overlap much easier. It seems like there is probably a computer-sciency
102 // way to solve the general case, but we don't need that yet.
103 std::string
StripTrailingWildcard(const std::string
& path
) {
104 size_t wildcard_index
= path
.find('*');
105 size_t path_last
= path
.size() - 1;
106 DCHECK(wildcard_index
== std::string::npos
|| wildcard_index
== path_last
);
107 return wildcard_index
== path_last
? path
.substr(0, path_last
) : path
;
112 URLPattern::URLPattern()
113 : valid_schemes_(SCHEME_NONE
),
114 match_all_urls_(false),
115 match_subdomains_(false),
118 URLPattern::URLPattern(int valid_schemes
)
119 : valid_schemes_(valid_schemes
),
120 match_all_urls_(false),
121 match_subdomains_(false),
124 URLPattern::URLPattern(int valid_schemes
, const std::string
& pattern
)
125 // Strict error checking is used, because this constructor is only
126 // appropriate when we know |pattern| is valid.
127 : valid_schemes_(valid_schemes
),
128 match_all_urls_(false),
129 match_subdomains_(false),
131 ParseResult result
= Parse(pattern
);
132 if (PARSE_SUCCESS
!= result
)
133 NOTREACHED() << "URLPattern invalid: " << pattern
<< " result " << result
;
136 URLPattern::~URLPattern() {
139 bool URLPattern::operator<(const URLPattern
& other
) const {
140 return GetAsString() < other
.GetAsString();
143 bool URLPattern::operator>(const URLPattern
& other
) const {
144 return GetAsString() > other
.GetAsString();
147 bool URLPattern::operator==(const URLPattern
& other
) const {
148 return GetAsString() == other
.GetAsString();
151 URLPattern::ParseResult
URLPattern::Parse(const std::string
& pattern
) {
153 SetMatchAllURLs(false);
154 SetMatchSubdomains(false);
157 // Special case pattern to match every valid URL.
158 if (pattern
== kAllUrlsPattern
) {
159 SetMatchAllURLs(true);
160 return PARSE_SUCCESS
;
163 // Parse out the scheme.
164 size_t scheme_end_pos
= pattern
.find(content::kStandardSchemeSeparator
);
165 bool has_standard_scheme_separator
= true;
167 // Some urls also use ':' alone as the scheme separator.
168 if (scheme_end_pos
== std::string::npos
) {
169 scheme_end_pos
= pattern
.find(':');
170 has_standard_scheme_separator
= false;
173 if (scheme_end_pos
== std::string::npos
)
174 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR
;
176 if (!SetScheme(pattern
.substr(0, scheme_end_pos
)))
177 return PARSE_ERROR_INVALID_SCHEME
;
179 bool standard_scheme
= IsStandardScheme(scheme_
);
180 if (standard_scheme
!= has_standard_scheme_separator
)
181 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR
;
183 // Advance past the scheme separator.
185 (standard_scheme
? strlen(content::kStandardSchemeSeparator
) : 1);
186 if (scheme_end_pos
>= pattern
.size())
187 return PARSE_ERROR_EMPTY_HOST
;
189 // Parse out the host and path.
190 size_t host_start_pos
= scheme_end_pos
;
191 size_t path_start_pos
= 0;
193 if (!standard_scheme
) {
194 path_start_pos
= host_start_pos
;
195 } else if (scheme_
== content::kFileScheme
) {
196 size_t host_end_pos
= pattern
.find(kPathSeparator
, host_start_pos
);
197 if (host_end_pos
== std::string::npos
) {
198 // Allow hostname omission.
199 // e.g. file://* is interpreted as file:///*,
200 // file://foo* is interpreted as file:///foo*.
201 path_start_pos
= host_start_pos
- 1;
203 // Ignore hostname if scheme is file://.
204 // e.g. file://localhost/foo is equal to file:///foo.
205 path_start_pos
= host_end_pos
;
208 size_t host_end_pos
= pattern
.find(kPathSeparator
, host_start_pos
);
211 if (host_start_pos
== host_end_pos
)
212 return PARSE_ERROR_EMPTY_HOST
;
214 if (host_end_pos
== std::string::npos
)
215 return PARSE_ERROR_EMPTY_PATH
;
217 host_
= pattern
.substr(host_start_pos
, host_end_pos
- host_start_pos
);
219 // The first component can optionally be '*' to match all subdomains.
220 std::vector
<std::string
> host_components
;
221 base::SplitString(host_
, '.', &host_components
);
222 if (host_components
[0] == "*") {
223 match_subdomains_
= true;
224 host_components
.erase(host_components
.begin(),
225 host_components
.begin() + 1);
227 host_
= JoinString(host_components
, '.');
229 path_start_pos
= host_end_pos
;
232 SetPath(pattern
.substr(path_start_pos
));
234 size_t port_pos
= host_
.find(':');
235 if (port_pos
!= std::string::npos
) {
236 if (!SetPort(host_
.substr(port_pos
+ 1)))
237 return PARSE_ERROR_INVALID_PORT
;
238 host_
= host_
.substr(0, port_pos
);
241 // No other '*' can occur in the host, though. This isn't necessary, but is
242 // done as a convenience to developers who might otherwise be confused and
243 // think '*' works as a glob in the host.
244 if (host_
.find('*') != std::string::npos
)
245 return PARSE_ERROR_INVALID_HOST_WILDCARD
;
247 return PARSE_SUCCESS
;
250 void URLPattern::SetValidSchemes(int valid_schemes
) {
252 valid_schemes_
= valid_schemes
;
255 void URLPattern::SetHost(const std::string
& host
) {
260 void URLPattern::SetMatchAllURLs(bool val
) {
262 match_all_urls_
= val
;
265 match_subdomains_
= true;
272 void URLPattern::SetMatchSubdomains(bool val
) {
274 match_subdomains_
= val
;
277 bool URLPattern::SetScheme(const std::string
& scheme
) {
280 if (scheme_
== "*") {
281 valid_schemes_
&= (SCHEME_HTTP
| SCHEME_HTTPS
);
282 } else if (!IsValidScheme(scheme_
)) {
288 bool URLPattern::IsValidScheme(const std::string
& scheme
) const {
289 if (valid_schemes_
== SCHEME_ALL
)
292 for (size_t i
= 0; i
< arraysize(kValidSchemes
); ++i
) {
293 if (scheme
== kValidSchemes
[i
] && (valid_schemes_
& kValidSchemeMasks
[i
]))
300 void URLPattern::SetPath(const std::string
& path
) {
303 path_escaped_
= path_
;
304 ReplaceSubstringsAfterOffset(&path_escaped_
, 0, "\\", "\\\\");
305 ReplaceSubstringsAfterOffset(&path_escaped_
, 0, "?", "\\?");
308 bool URLPattern::SetPort(const std::string
& port
) {
310 if (IsValidPortForScheme(scheme_
, port
)) {
317 bool URLPattern::MatchesURL(const GURL
& test
) const {
318 const GURL
* test_url
= &test
;
319 bool has_inner_url
= test
.inner_url() != NULL
;
322 if (!test
.SchemeIsFileSystem())
323 return false; // The only nested URLs we handle are filesystem URLs.
324 test_url
= test
.inner_url();
327 if (!MatchesScheme(test_url
->scheme()))
333 std::string path_for_request
= test
.PathForRequest();
335 path_for_request
= test_url
->path() + path_for_request
;
337 return MatchesSecurityOriginHelper(*test_url
) &&
338 MatchesPath(path_for_request
);
341 bool URLPattern::MatchesSecurityOrigin(const GURL
& test
) const {
342 const GURL
* test_url
= &test
;
343 bool has_inner_url
= test
.inner_url() != NULL
;
346 if (!test
.SchemeIsFileSystem())
347 return false; // The only nested URLs we handle are filesystem URLs.
348 test_url
= test
.inner_url();
351 if (!MatchesScheme(test_url
->scheme()))
357 return MatchesSecurityOriginHelper(*test_url
);
360 bool URLPattern::MatchesScheme(const std::string
& test
) const {
361 if (!IsValidScheme(test
))
364 return scheme_
== "*" || test
== scheme_
;
367 bool URLPattern::MatchesHost(const std::string
& host
) const {
368 std::string
test(content::kHttpScheme
);
369 test
+= content::kStandardSchemeSeparator
;
372 return MatchesHost(GURL(test
));
375 bool URLPattern::MatchesHost(const GURL
& test
) const {
376 // If the hosts are exactly equal, we have a match.
377 if (test
.host() == host_
)
380 // If we're matching subdomains, and we have no host in the match pattern,
381 // that means that we're matching all hosts, which means we have a match no
382 // matter what the test host is.
383 if (match_subdomains_
&& host_
.empty())
386 // Otherwise, we can only match if our match pattern matches subdomains.
387 if (!match_subdomains_
)
390 // We don't do subdomain matching against IP addresses, so we can give up now
391 // if the test host is an IP address.
392 if (test
.HostIsIPAddress())
395 // Check if the test host is a subdomain of our host.
396 if (test
.host().length() <= (host_
.length() + 1))
399 if (test
.host().compare(test
.host().length() - host_
.length(),
400 host_
.length(), host_
) != 0)
403 return test
.host()[test
.host().length() - host_
.length() - 1] == '.';
406 bool URLPattern::MatchesPath(const std::string
& test
) const {
407 // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
408 // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
409 if (test
+ "/*" == path_escaped_
)
412 return MatchPattern(test
, path_escaped_
);
415 const std::string
& URLPattern::GetAsString() const {
419 if (match_all_urls_
) {
420 spec_
= kAllUrlsPattern
;
424 bool standard_scheme
= IsStandardScheme(scheme_
);
426 std::string spec
= scheme_
+
427 (standard_scheme
? content::kStandardSchemeSeparator
: ":");
429 if (scheme_
!= content::kFileScheme
&& standard_scheme
) {
430 if (match_subdomains_
) {
452 bool URLPattern::OverlapsWith(const URLPattern
& other
) const {
453 if (match_all_urls() || other
.match_all_urls())
455 return (MatchesAnyScheme(other
.GetExplicitSchemes()) ||
456 other
.MatchesAnyScheme(GetExplicitSchemes()))
457 && (MatchesHost(other
.host()) || other
.MatchesHost(host()))
458 && (MatchesPortPattern(other
.port()) || other
.MatchesPortPattern(port()))
459 && (MatchesPath(StripTrailingWildcard(other
.path())) ||
460 other
.MatchesPath(StripTrailingWildcard(path())));
463 bool URLPattern::Contains(const URLPattern
& other
) const {
464 if (match_all_urls())
466 return MatchesAllSchemes(other
.GetExplicitSchemes())
467 && MatchesHost(other
.host())
468 && MatchesPortPattern(other
.port())
469 && MatchesPath(StripTrailingWildcard(other
.path()));
472 bool URLPattern::MatchesAnyScheme(
473 const std::vector
<std::string
>& schemes
) const {
474 for (std::vector
<std::string
>::const_iterator i
= schemes
.begin();
475 i
!= schemes
.end(); ++i
) {
476 if (MatchesScheme(*i
))
483 bool URLPattern::MatchesAllSchemes(
484 const std::vector
<std::string
>& schemes
) const {
485 for (std::vector
<std::string
>::const_iterator i
= schemes
.begin();
486 i
!= schemes
.end(); ++i
) {
487 if (!MatchesScheme(*i
))
494 bool URLPattern::MatchesSecurityOriginHelper(const GURL
& test
) const {
495 // Ignore hostname if scheme is file://.
496 if (scheme_
!= content::kFileScheme
&& !MatchesHost(test
))
499 if (!MatchesPortPattern(base::IntToString(test
.EffectiveIntPort())))
505 bool URLPattern::MatchesPortPattern(const std::string
& port
) const {
506 return port_
== "*" || port_
== port
;
509 std::vector
<std::string
> URLPattern::GetExplicitSchemes() const {
510 std::vector
<std::string
> result
;
512 if (scheme_
!= "*" && !match_all_urls_
&& IsValidScheme(scheme_
)) {
513 result
.push_back(scheme_
);
517 for (size_t i
= 0; i
< arraysize(kValidSchemes
); ++i
) {
518 if (MatchesScheme(kValidSchemes
[i
])) {
519 result
.push_back(kValidSchemes
[i
]);
526 std::vector
<URLPattern
> URLPattern::ConvertToExplicitSchemes() const {
527 std::vector
<std::string
> explicit_schemes
= GetExplicitSchemes();
528 std::vector
<URLPattern
> result
;
530 for (std::vector
<std::string
>::const_iterator i
= explicit_schemes
.begin();
531 i
!= explicit_schemes
.end(); ++i
) {
532 URLPattern temp
= *this;
534 temp
.SetMatchAllURLs(false);
535 result
.push_back(temp
);
542 const char* URLPattern::GetParseResultString(
543 URLPattern::ParseResult parse_result
) {
544 return kParseResultMessages
[parse_result
];