Don't strip scheme from parameter when running external protocol handler
[chromium-blink-merge.git] / url / gurl.cc
blobc22236f89e533129d935f22f5f714b2109125fea
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifdef WIN32
6 #include <windows.h>
7 #else
8 #include <pthread.h>
9 #endif
11 #include <algorithm>
12 #include <ostream>
14 #include "url/gurl.h"
16 #include "base/logging.h"
17 #include "base/strings/string_piece.h"
18 #include "base/strings/string_util.h"
19 #include "url/url_canon_stdstring.h"
20 #include "url/url_util.h"
22 namespace {
24 static std::string* empty_string = NULL;
25 static GURL* empty_gurl = NULL;
27 #ifdef WIN32
29 // Returns a static reference to an empty string for returning a reference
30 // when there is no underlying string.
31 const std::string& EmptyStringForGURL() {
32 // Avoid static object construction/destruction on startup/shutdown.
33 if (!empty_string) {
34 // Create the string. Be careful that we don't break in the case that this
35 // is being called from multiple threads. Statics are not threadsafe.
36 std::string* new_empty_string = new std::string;
37 if (InterlockedCompareExchangePointer(
38 reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
39 // The old value was non-NULL, so no replacement was done. Another
40 // thread did the initialization out from under us.
41 delete new_empty_string;
44 return *empty_string;
47 #else
49 static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
50 static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
52 void EmptyStringForGURLOnce(void) {
53 empty_string = new std::string;
56 const std::string& EmptyStringForGURL() {
57 // Avoid static object construction/destruction on startup/shutdown.
58 pthread_once(&empty_string_once, EmptyStringForGURLOnce);
59 return *empty_string;
62 #endif // WIN32
64 } // namespace
66 GURL::GURL() : is_valid_(false) {
69 GURL::GURL(const GURL& other)
70 : spec_(other.spec_),
71 is_valid_(other.is_valid_),
72 parsed_(other.parsed_) {
73 if (other.inner_url_)
74 inner_url_.reset(new GURL(*other.inner_url_));
75 // Valid filesystem urls should always have an inner_url_.
76 DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
79 GURL::GURL(const std::string& url_string) {
80 InitCanonical(url_string, true);
83 GURL::GURL(const base::string16& url_string) {
84 InitCanonical(url_string, true);
87 GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
88 InitCanonical(url_string, false);
91 GURL::GURL(const char* canonical_spec,
92 size_t canonical_spec_len,
93 const url::Parsed& parsed,
94 bool is_valid)
95 : spec_(canonical_spec, canonical_spec_len),
96 is_valid_(is_valid),
97 parsed_(parsed) {
98 InitializeFromCanonicalSpec();
101 GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
102 : is_valid_(is_valid),
103 parsed_(parsed) {
104 spec_.swap(canonical_spec);
105 InitializeFromCanonicalSpec();
108 template<typename STR>
109 void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
110 // Reserve enough room in the output for the input, plus some extra so that
111 // we have room if we have to escape a few things without reallocating.
112 spec_.reserve(input_spec.size() + 32);
113 url::StdStringCanonOutput output(&spec_);
114 is_valid_ = url::Canonicalize(
115 input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
116 NULL, &output, &parsed_);
118 output.Complete(); // Must be done before using string.
119 if (is_valid_ && SchemeIsFileSystem()) {
120 inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
121 *parsed_.inner_parsed(), true));
125 void GURL::InitializeFromCanonicalSpec() {
126 if (is_valid_ && SchemeIsFileSystem()) {
127 inner_url_.reset(
128 new GURL(spec_.data(), parsed_.Length(),
129 *parsed_.inner_parsed(), true));
132 #ifndef NDEBUG
133 // For testing purposes, check that the parsed canonical URL is identical to
134 // what we would have produced. Skip checking for invalid URLs have no meaning
135 // and we can't always canonicalize then reproducibly.
136 if (is_valid_) {
137 url::Component scheme;
138 // We can't do this check on the inner_url of a filesystem URL, as
139 // canonical_spec actually points to the start of the outer URL, so we'd
140 // end up with infinite recursion in this constructor.
141 if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
142 url::kFileSystemScheme, &scheme) ||
143 scheme.begin == parsed_.scheme.begin) {
144 // We need to retain trailing whitespace on path URLs, as the |parsed_|
145 // spec we originally received may legitimately contain trailing white-
146 // space on the path or components e.g. if the #ref has been
147 // removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
148 GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
150 DCHECK(test_url.is_valid_ == is_valid_);
151 DCHECK(test_url.spec_ == spec_);
153 DCHECK(test_url.parsed_.scheme == parsed_.scheme);
154 DCHECK(test_url.parsed_.username == parsed_.username);
155 DCHECK(test_url.parsed_.password == parsed_.password);
156 DCHECK(test_url.parsed_.host == parsed_.host);
157 DCHECK(test_url.parsed_.port == parsed_.port);
158 DCHECK(test_url.parsed_.path == parsed_.path);
159 DCHECK(test_url.parsed_.query == parsed_.query);
160 DCHECK(test_url.parsed_.ref == parsed_.ref);
163 #endif
166 GURL::~GURL() {
169 GURL& GURL::operator=(GURL other) {
170 Swap(&other);
171 return *this;
174 const std::string& GURL::spec() const {
175 if (is_valid_ || spec_.empty())
176 return spec_;
178 DCHECK(false) << "Trying to get the spec of an invalid URL!";
179 return EmptyStringForGURL();
182 bool GURL::operator==(const GURL& other) const {
183 return spec_ == other.spec_;
186 bool GURL::operator!=(const GURL& other) const {
187 return spec_ != other.spec_;
190 bool GURL::operator<(const GURL& other) const {
191 return spec_ < other.spec_;
194 bool GURL::operator>(const GURL& other) const {
195 return spec_ > other.spec_;
198 // Note: code duplicated below (it's inconvenient to use a template here).
199 GURL GURL::Resolve(const std::string& relative) const {
200 // Not allowed for invalid URLs.
201 if (!is_valid_)
202 return GURL();
204 GURL result;
206 // Reserve enough room in the output for the input, plus some extra so that
207 // we have room if we have to escape a few things without reallocating.
208 result.spec_.reserve(spec_.size() + 32);
209 url::StdStringCanonOutput output(&result.spec_);
211 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
212 parsed_, relative.data(),
213 static_cast<int>(relative.length()),
214 nullptr, &output, &result.parsed_)) {
215 // Error resolving, return an empty URL.
216 return GURL();
219 output.Complete();
220 result.is_valid_ = true;
221 if (result.SchemeIsFileSystem()) {
222 result.inner_url_.reset(
223 new GURL(result.spec_.data(), result.parsed_.Length(),
224 *result.parsed_.inner_parsed(), true));
226 return result;
229 // Note: code duplicated above (it's inconvenient to use a template here).
230 GURL GURL::Resolve(const base::string16& relative) const {
231 // Not allowed for invalid URLs.
232 if (!is_valid_)
233 return GURL();
235 GURL result;
237 // Reserve enough room in the output for the input, plus some extra so that
238 // we have room if we have to escape a few things without reallocating.
239 result.spec_.reserve(spec_.size() + 32);
240 url::StdStringCanonOutput output(&result.spec_);
242 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
243 parsed_, relative.data(),
244 static_cast<int>(relative.length()),
245 nullptr, &output, &result.parsed_)) {
246 // Error resolving, return an empty URL.
247 return GURL();
250 output.Complete();
251 result.is_valid_ = true;
252 if (result.SchemeIsFileSystem()) {
253 result.inner_url_.reset(
254 new GURL(result.spec_.data(), result.parsed_.Length(),
255 *result.parsed_.inner_parsed(), true));
257 return result;
260 // Note: code duplicated below (it's inconvenient to use a template here).
261 GURL GURL::ReplaceComponents(
262 const url::Replacements<char>& replacements) const {
263 GURL result;
265 // Not allowed for invalid URLs.
266 if (!is_valid_)
267 return GURL();
269 // Reserve enough room in the output for the input, plus some extra so that
270 // we have room if we have to escape a few things without reallocating.
271 result.spec_.reserve(spec_.size() + 32);
272 url::StdStringCanonOutput output(&result.spec_);
274 result.is_valid_ = url::ReplaceComponents(
275 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
276 NULL, &output, &result.parsed_);
278 output.Complete();
279 if (result.is_valid_ && result.SchemeIsFileSystem()) {
280 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
281 *result.parsed_.inner_parsed(), true));
283 return result;
286 // Note: code duplicated above (it's inconvenient to use a template here).
287 GURL GURL::ReplaceComponents(
288 const url::Replacements<base::char16>& replacements) const {
289 GURL result;
291 // Not allowed for invalid URLs.
292 if (!is_valid_)
293 return GURL();
295 // Reserve enough room in the output for the input, plus some extra so that
296 // we have room if we have to escape a few things without reallocating.
297 result.spec_.reserve(spec_.size() + 32);
298 url::StdStringCanonOutput output(&result.spec_);
300 result.is_valid_ = url::ReplaceComponents(
301 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
302 NULL, &output, &result.parsed_);
304 output.Complete();
305 if (result.is_valid_ && result.SchemeIsFileSystem()) {
306 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
307 *result.parsed_.inner_parsed(), true));
309 return result;
312 GURL GURL::GetOrigin() const {
313 // This doesn't make sense for invalid or nonstandard URLs, so return
314 // the empty URL.
315 if (!is_valid_ || !IsStandard())
316 return GURL();
318 if (SchemeIsFileSystem())
319 return inner_url_->GetOrigin();
321 url::Replacements<char> replacements;
322 replacements.ClearUsername();
323 replacements.ClearPassword();
324 replacements.ClearPath();
325 replacements.ClearQuery();
326 replacements.ClearRef();
328 return ReplaceComponents(replacements);
331 GURL GURL::GetAsReferrer() const {
332 if (!is_valid_ || !SchemeIsHTTPOrHTTPS())
333 return GURL();
335 if (!has_ref() && !has_username() && !has_password())
336 return GURL(*this);
338 url::Replacements<char> replacements;
339 replacements.ClearRef();
340 replacements.ClearUsername();
341 replacements.ClearPassword();
342 return ReplaceComponents(replacements);
345 GURL GURL::GetWithEmptyPath() const {
346 // This doesn't make sense for invalid or nonstandard URLs, so return
347 // the empty URL.
348 if (!is_valid_ || !IsStandard())
349 return GURL();
351 // We could optimize this since we know that the URL is canonical, and we are
352 // appending a canonical path, so avoiding re-parsing.
353 GURL other(*this);
354 if (parsed_.path.len == 0)
355 return other;
357 // Clear everything after the path.
358 other.parsed_.query.reset();
359 other.parsed_.ref.reset();
361 // Set the path, since the path is longer than one, we can just set the
362 // first character and resize.
363 other.spec_[other.parsed_.path.begin] = '/';
364 other.parsed_.path.len = 1;
365 other.spec_.resize(other.parsed_.path.begin + 1);
366 return other;
369 bool GURL::IsStandard() const {
370 return url::IsStandard(spec_.data(), parsed_.scheme);
373 bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
374 if (parsed_.scheme.len <= 0)
375 return lower_ascii_scheme == NULL;
376 return base::LowerCaseEqualsASCII(
377 base::StringPiece(spec_.data() + parsed_.scheme.begin,
378 parsed_.scheme.len),
379 lower_ascii_scheme);
382 bool GURL::SchemeIsHTTPOrHTTPS() const {
383 return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
386 bool GURL::SchemeIsWSOrWSS() const {
387 return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
390 int GURL::IntPort() const {
391 if (parsed_.port.is_nonempty())
392 return url::ParsePort(spec_.data(), parsed_.port);
393 return url::PORT_UNSPECIFIED;
396 int GURL::EffectiveIntPort() const {
397 int int_port = IntPort();
398 if (int_port == url::PORT_UNSPECIFIED && IsStandard())
399 return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
400 parsed_.scheme.len);
401 return int_port;
404 std::string GURL::ExtractFileName() const {
405 url::Component file_component;
406 url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
407 return ComponentString(file_component);
410 std::string GURL::PathForRequest() const {
411 DCHECK(parsed_.path.len > 0)
412 << "Canonical path for requests should be non-empty";
413 if (parsed_.ref.len >= 0) {
414 // Clip off the reference when it exists. The reference starts after the
415 // #-sign, so we have to subtract one to also remove it.
416 return std::string(spec_, parsed_.path.begin,
417 parsed_.ref.begin - parsed_.path.begin - 1);
419 // Compute the actual path length, rather than depending on the spec's
420 // terminator. If we're an inner_url, our spec continues on into our outer
421 // URL's path/query/ref.
422 int path_len = parsed_.path.len;
423 if (parsed_.query.is_valid())
424 path_len = parsed_.query.end() - parsed_.path.begin;
426 return std::string(spec_, parsed_.path.begin, path_len);
429 std::string GURL::HostNoBrackets() const {
430 // If host looks like an IPv6 literal, strip the square brackets.
431 url::Component h(parsed_.host);
432 if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
433 h.begin++;
434 h.len -= 2;
436 return ComponentString(h);
439 std::string GURL::GetContent() const {
440 return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
443 bool GURL::HostIsIPAddress() const {
444 if (!is_valid_ || spec_.empty())
445 return false;
447 url::RawCanonOutputT<char, 128> ignored_output;
448 url::CanonHostInfo host_info;
449 url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
450 &host_info);
451 return host_info.IsIPAddress();
454 #ifdef WIN32
456 const GURL& GURL::EmptyGURL() {
457 // Avoid static object construction/destruction on startup/shutdown.
458 if (!empty_gurl) {
459 // Create the string. Be careful that we don't break in the case that this
460 // is being called from multiple threads.
461 GURL* new_empty_gurl = new GURL;
462 if (InterlockedCompareExchangePointer(
463 reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
464 // The old value was non-NULL, so no replacement was done. Another
465 // thread did the initialization out from under us.
466 delete new_empty_gurl;
469 return *empty_gurl;
472 #else
474 void EmptyGURLOnce(void) {
475 empty_gurl = new GURL;
478 const GURL& GURL::EmptyGURL() {
479 // Avoid static object construction/destruction on startup/shutdown.
480 pthread_once(&empty_gurl_once, EmptyGURLOnce);
481 return *empty_gurl;
484 #endif // WIN32
486 bool GURL::DomainIs(base::StringPiece lower_ascii_domain) const {
487 if (!is_valid_ || lower_ascii_domain.empty())
488 return false;
490 // FileSystem URLs have empty parsed_.host, so check this first.
491 if (SchemeIsFileSystem() && inner_url_)
492 return inner_url_->DomainIs(lower_ascii_domain);
494 if (!parsed_.host.is_nonempty())
495 return false;
497 // If the host name ends with a dot but the input domain doesn't,
498 // then we ignore the dot in the host name.
499 const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
500 int host_len = parsed_.host.len;
501 int domain_len = lower_ascii_domain.length();
502 if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
503 host_last_pos--;
504 host_len--;
507 if (host_len < domain_len)
508 return false;
510 // |host_first_pos| is the start of the compared part of the host name, not
511 // start of the whole host name.
512 const char* host_first_pos = spec_.data() + parsed_.host.begin +
513 host_len - domain_len;
515 if (!base::LowerCaseEqualsASCII(
516 base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
517 return false;
519 // Make sure there aren't extra characters in host before the compared part;
520 // if the host name is longer than the input domain name, then the character
521 // immediately before the compared part should be a dot. For example,
522 // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
523 if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
524 '.' != *(host_first_pos - 1))
525 return false;
527 return true;
530 void GURL::Swap(GURL* other) {
531 spec_.swap(other->spec_);
532 std::swap(is_valid_, other->is_valid_);
533 std::swap(parsed_, other->parsed_);
534 inner_url_.swap(other->inner_url_);
537 std::ostream& operator<<(std::ostream& out, const GURL& url) {
538 return out << url.possibly_invalid_spec();