Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / url / gurl.cc
blob2547a953a29cb0232dec123b10b67f15bc190a31
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifdef WIN32
6 #include <windows.h>
7 #else
8 #include <pthread.h>
9 #endif
11 #include <algorithm>
12 #include <ostream>
14 #include "url/gurl.h"
16 #include "base/logging.h"
17 #include "base/strings/string_util.h"
18 #include "url/url_canon_stdstring.h"
19 #include "url/url_util.h"
21 namespace {
23 static std::string* empty_string = NULL;
24 static GURL* empty_gurl = NULL;
26 #ifdef WIN32
28 // Returns a static reference to an empty string for returning a reference
29 // when there is no underlying string.
30 const std::string& EmptyStringForGURL() {
31 // Avoid static object construction/destruction on startup/shutdown.
32 if (!empty_string) {
33 // Create the string. Be careful that we don't break in the case that this
34 // is being called from multiple threads. Statics are not threadsafe.
35 std::string* new_empty_string = new std::string;
36 if (InterlockedCompareExchangePointer(
37 reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
38 // The old value was non-NULL, so no replacement was done. Another
39 // thread did the initialization out from under us.
40 delete new_empty_string;
43 return *empty_string;
46 #else
48 static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
49 static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
51 void EmptyStringForGURLOnce(void) {
52 empty_string = new std::string;
55 const std::string& EmptyStringForGURL() {
56 // Avoid static object construction/destruction on startup/shutdown.
57 pthread_once(&empty_string_once, EmptyStringForGURLOnce);
58 return *empty_string;
61 #endif // WIN32
63 } // namespace
65 GURL::GURL() : is_valid_(false) {
68 GURL::GURL(const GURL& other)
69 : spec_(other.spec_),
70 is_valid_(other.is_valid_),
71 parsed_(other.parsed_) {
72 if (other.inner_url_)
73 inner_url_.reset(new GURL(*other.inner_url_));
74 // Valid filesystem urls should always have an inner_url_.
75 DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
78 GURL::GURL(const std::string& url_string) {
79 InitCanonical(url_string, true);
82 GURL::GURL(const base::string16& url_string) {
83 InitCanonical(url_string, true);
86 GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
87 InitCanonical(url_string, false);
90 GURL::GURL(const char* canonical_spec,
91 size_t canonical_spec_len,
92 const url::Parsed& parsed,
93 bool is_valid)
94 : spec_(canonical_spec, canonical_spec_len),
95 is_valid_(is_valid),
96 parsed_(parsed) {
97 InitializeFromCanonicalSpec();
100 GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
101 : is_valid_(is_valid),
102 parsed_(parsed) {
103 spec_.swap(canonical_spec);
104 InitializeFromCanonicalSpec();
107 template<typename STR>
108 void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
109 // Reserve enough room in the output for the input, plus some extra so that
110 // we have room if we have to escape a few things without reallocating.
111 spec_.reserve(input_spec.size() + 32);
112 url::StdStringCanonOutput output(&spec_);
113 is_valid_ = url::Canonicalize(
114 input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
115 NULL, &output, &parsed_);
117 output.Complete(); // Must be done before using string.
118 if (is_valid_ && SchemeIsFileSystem()) {
119 inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
120 *parsed_.inner_parsed(), true));
124 void GURL::InitializeFromCanonicalSpec() {
125 if (is_valid_ && SchemeIsFileSystem()) {
126 inner_url_.reset(
127 new GURL(spec_.data(), parsed_.Length(),
128 *parsed_.inner_parsed(), true));
131 #ifndef NDEBUG
132 // For testing purposes, check that the parsed canonical URL is identical to
133 // what we would have produced. Skip checking for invalid URLs have no meaning
134 // and we can't always canonicalize then reproducabely.
135 if (is_valid_) {
136 url::Component scheme;
137 // We can't do this check on the inner_url of a filesystem URL, as
138 // canonical_spec actually points to the start of the outer URL, so we'd
139 // end up with infinite recursion in this constructor.
140 if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
141 url::kFileSystemScheme, &scheme) ||
142 scheme.begin == parsed_.scheme.begin) {
143 // We need to retain trailing whitespace on path URLs, as the |parsed_|
144 // spec we originally received may legitimately contain trailing white-
145 // space on the path or components e.g. if the #ref has been
146 // removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
147 GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
149 DCHECK(test_url.is_valid_ == is_valid_);
150 DCHECK(test_url.spec_ == spec_);
152 DCHECK(test_url.parsed_.scheme == parsed_.scheme);
153 DCHECK(test_url.parsed_.username == parsed_.username);
154 DCHECK(test_url.parsed_.password == parsed_.password);
155 DCHECK(test_url.parsed_.host == parsed_.host);
156 DCHECK(test_url.parsed_.port == parsed_.port);
157 DCHECK(test_url.parsed_.path == parsed_.path);
158 DCHECK(test_url.parsed_.query == parsed_.query);
159 DCHECK(test_url.parsed_.ref == parsed_.ref);
162 #endif
165 GURL::~GURL() {
168 GURL& GURL::operator=(GURL other) {
169 Swap(&other);
170 return *this;
173 const std::string& GURL::spec() const {
174 if (is_valid_ || spec_.empty())
175 return spec_;
177 DCHECK(false) << "Trying to get the spec of an invalid URL!";
178 return EmptyStringForGURL();
181 bool GURL::operator==(const GURL& other) const {
182 return spec_ == other.spec_;
185 bool GURL::operator!=(const GURL& other) const {
186 return spec_ != other.spec_;
189 bool GURL::operator<(const GURL& other) const {
190 return spec_ < other.spec_;
193 bool GURL::operator>(const GURL& other) const {
194 return spec_ > other.spec_;
197 // Note: code duplicated below (it's inconvenient to use a template here).
198 GURL GURL::Resolve(const std::string& relative) const {
199 // Not allowed for invalid URLs.
200 if (!is_valid_)
201 return GURL();
203 GURL result;
205 // Reserve enough room in the output for the input, plus some extra so that
206 // we have room if we have to escape a few things without reallocating.
207 result.spec_.reserve(spec_.size() + 32);
208 url::StdStringCanonOutput output(&result.spec_);
210 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
211 parsed_, relative.data(),
212 static_cast<int>(relative.length()),
213 nullptr, &output, &result.parsed_)) {
214 // Error resolving, return an empty URL.
215 return GURL();
218 output.Complete();
219 result.is_valid_ = true;
220 if (result.SchemeIsFileSystem()) {
221 result.inner_url_.reset(
222 new GURL(result.spec_.data(), result.parsed_.Length(),
223 *result.parsed_.inner_parsed(), true));
225 return result;
228 // Note: code duplicated above (it's inconvenient to use a template here).
229 GURL GURL::Resolve(const base::string16& relative) const {
230 // Not allowed for invalid URLs.
231 if (!is_valid_)
232 return GURL();
234 GURL result;
236 // Reserve enough room in the output for the input, plus some extra so that
237 // we have room if we have to escape a few things without reallocating.
238 result.spec_.reserve(spec_.size() + 32);
239 url::StdStringCanonOutput output(&result.spec_);
241 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
242 parsed_, relative.data(),
243 static_cast<int>(relative.length()),
244 nullptr, &output, &result.parsed_)) {
245 // Error resolving, return an empty URL.
246 return GURL();
249 output.Complete();
250 result.is_valid_ = true;
251 if (result.SchemeIsFileSystem()) {
252 result.inner_url_.reset(
253 new GURL(result.spec_.data(), result.parsed_.Length(),
254 *result.parsed_.inner_parsed(), true));
256 return result;
259 // Note: code duplicated below (it's inconvenient to use a template here).
260 GURL GURL::ReplaceComponents(
261 const url::Replacements<char>& replacements) const {
262 GURL result;
264 // Not allowed for invalid URLs.
265 if (!is_valid_)
266 return GURL();
268 // Reserve enough room in the output for the input, plus some extra so that
269 // we have room if we have to escape a few things without reallocating.
270 result.spec_.reserve(spec_.size() + 32);
271 url::StdStringCanonOutput output(&result.spec_);
273 result.is_valid_ = url::ReplaceComponents(
274 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
275 NULL, &output, &result.parsed_);
277 output.Complete();
278 if (result.is_valid_ && result.SchemeIsFileSystem()) {
279 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
280 *result.parsed_.inner_parsed(), true));
282 return result;
285 // Note: code duplicated above (it's inconvenient to use a template here).
286 GURL GURL::ReplaceComponents(
287 const url::Replacements<base::char16>& replacements) const {
288 GURL result;
290 // Not allowed for invalid URLs.
291 if (!is_valid_)
292 return GURL();
294 // Reserve enough room in the output for the input, plus some extra so that
295 // we have room if we have to escape a few things without reallocating.
296 result.spec_.reserve(spec_.size() + 32);
297 url::StdStringCanonOutput output(&result.spec_);
299 result.is_valid_ = url::ReplaceComponents(
300 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
301 NULL, &output, &result.parsed_);
303 output.Complete();
304 if (result.is_valid_ && result.SchemeIsFileSystem()) {
305 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
306 *result.parsed_.inner_parsed(), true));
308 return result;
311 GURL GURL::GetOrigin() const {
312 // This doesn't make sense for invalid or nonstandard URLs, so return
313 // the empty URL
314 if (!is_valid_ || !IsStandard())
315 return GURL();
317 if (SchemeIsFileSystem())
318 return inner_url_->GetOrigin();
320 url::Replacements<char> replacements;
321 replacements.ClearUsername();
322 replacements.ClearPassword();
323 replacements.ClearPath();
324 replacements.ClearQuery();
325 replacements.ClearRef();
327 return ReplaceComponents(replacements);
330 GURL GURL::GetAsReferrer() const {
331 if (!is_valid_ || !SchemeIsHTTPOrHTTPS())
332 return GURL();
334 if (!has_ref() && !has_username() && !has_password())
335 return GURL(*this);
337 url::Replacements<char> replacements;
338 replacements.ClearRef();
339 replacements.ClearUsername();
340 replacements.ClearPassword();
341 return ReplaceComponents(replacements);
344 GURL GURL::GetWithEmptyPath() const {
345 // This doesn't make sense for invalid or nonstandard URLs, so return
346 // the empty URL.
347 if (!is_valid_ || !IsStandard())
348 return GURL();
350 // We could optimize this since we know that the URL is canonical, and we are
351 // appending a canonical path, so avoiding re-parsing.
352 GURL other(*this);
353 if (parsed_.path.len == 0)
354 return other;
356 // Clear everything after the path.
357 other.parsed_.query.reset();
358 other.parsed_.ref.reset();
360 // Set the path, since the path is longer than one, we can just set the
361 // first character and resize.
362 other.spec_[other.parsed_.path.begin] = '/';
363 other.parsed_.path.len = 1;
364 other.spec_.resize(other.parsed_.path.begin + 1);
365 return other;
368 bool GURL::IsStandard() const {
369 return url::IsStandard(spec_.data(), parsed_.scheme);
372 bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
373 if (parsed_.scheme.len <= 0)
374 return lower_ascii_scheme == NULL;
375 return base::LowerCaseEqualsASCII(
376 base::StringPiece(spec_.data() + parsed_.scheme.begin,
377 parsed_.scheme.len),
378 lower_ascii_scheme);
381 bool GURL::SchemeIsHTTPOrHTTPS() const {
382 return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
385 bool GURL::SchemeIsWSOrWSS() const {
386 return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
389 int GURL::IntPort() const {
390 if (parsed_.port.is_nonempty())
391 return url::ParsePort(spec_.data(), parsed_.port);
392 return url::PORT_UNSPECIFIED;
395 int GURL::EffectiveIntPort() const {
396 int int_port = IntPort();
397 if (int_port == url::PORT_UNSPECIFIED && IsStandard())
398 return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
399 parsed_.scheme.len);
400 return int_port;
403 std::string GURL::ExtractFileName() const {
404 url::Component file_component;
405 url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
406 return ComponentString(file_component);
409 std::string GURL::PathForRequest() const {
410 DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
411 if (parsed_.ref.len >= 0) {
412 // Clip off the reference when it exists. The reference starts after the #
413 // sign, so we have to subtract one to also remove it.
414 return std::string(spec_, parsed_.path.begin,
415 parsed_.ref.begin - parsed_.path.begin - 1);
417 // Compute the actual path length, rather than depending on the spec's
418 // terminator. If we're an inner_url, our spec continues on into our outer
419 // url's path/query/ref.
420 int path_len = parsed_.path.len;
421 if (parsed_.query.is_valid())
422 path_len = parsed_.query.end() - parsed_.path.begin;
424 return std::string(spec_, parsed_.path.begin, path_len);
427 std::string GURL::HostNoBrackets() const {
428 // If host looks like an IPv6 literal, strip the square brackets.
429 url::Component h(parsed_.host);
430 if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
431 h.begin++;
432 h.len -= 2;
434 return ComponentString(h);
437 std::string GURL::GetContent() const {
438 return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
441 bool GURL::HostIsIPAddress() const {
442 if (!is_valid_ || spec_.empty())
443 return false;
445 url::RawCanonOutputT<char, 128> ignored_output;
446 url::CanonHostInfo host_info;
447 url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
448 &host_info);
449 return host_info.IsIPAddress();
452 #ifdef WIN32
454 const GURL& GURL::EmptyGURL() {
455 // Avoid static object construction/destruction on startup/shutdown.
456 if (!empty_gurl) {
457 // Create the string. Be careful that we don't break in the case that this
458 // is being called from multiple threads.
459 GURL* new_empty_gurl = new GURL;
460 if (InterlockedCompareExchangePointer(
461 reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
462 // The old value was non-NULL, so no replacement was done. Another
463 // thread did the initialization out from under us.
464 delete new_empty_gurl;
467 return *empty_gurl;
470 #else
472 void EmptyGURLOnce(void) {
473 empty_gurl = new GURL;
476 const GURL& GURL::EmptyGURL() {
477 // Avoid static object construction/destruction on startup/shutdown.
478 pthread_once(&empty_gurl_once, EmptyGURLOnce);
479 return *empty_gurl;
482 #endif // WIN32
484 bool GURL::DomainIs(const char* lower_ascii_domain,
485 int domain_len) const {
486 // Return false if this URL is not valid or domain is empty.
487 if (!is_valid_ || !domain_len)
488 return false;
490 // FileSystem URLs have empty parsed_.host, so check this first.
491 if (SchemeIsFileSystem() && inner_url_)
492 return inner_url_->DomainIs(lower_ascii_domain, domain_len);
494 if (!parsed_.host.is_nonempty())
495 return false;
497 // Check whether the host name is end with a dot. If yes, treat it
498 // the same as no-dot unless the input comparison domain is end
499 // with dot.
500 const char* last_pos = spec_.data() + parsed_.host.end() - 1;
501 int host_len = parsed_.host.len;
502 if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
503 last_pos--;
504 host_len--;
507 // Return false if host's length is less than domain's length.
508 if (host_len < domain_len)
509 return false;
511 // Compare this url whether belong specific domain.
512 const char* start_pos = spec_.data() + parsed_.host.begin +
513 host_len - domain_len;
515 if (!base::LowerCaseEqualsASCII(
516 base::StringPiece(start_pos, last_pos - start_pos + 1),
517 base::StringPiece(lower_ascii_domain, domain_len)))
518 return false;
520 // Check whether host has right domain start with dot, make sure we got
521 // right domain range. For example www.google.com has domain
522 // "google.com" but www.iamnotgoogle.com does not.
523 if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
524 '.' != *(start_pos - 1))
525 return false;
527 return true;
530 void GURL::Swap(GURL* other) {
531 spec_.swap(other->spec_);
532 std::swap(is_valid_, other->is_valid_);
533 std::swap(parsed_, other->parsed_);
534 inner_url_.swap(other->inner_url_);
537 std::ostream& operator<<(std::ostream& out, const GURL& url) {
538 return out << url.possibly_invalid_spec();