Add ICU message format support
[chromium-blink-merge.git] / url / gurl.cc
blob31e8c75d24fa0fcadaef7c37d208f8248c04e784
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifdef WIN32
6 #include <windows.h>
7 #else
8 #include <pthread.h>
9 #endif
11 #include <algorithm>
12 #include <ostream>
14 #include "url/gurl.h"
16 #include "base/logging.h"
17 #include "base/strings/string_piece.h"
18 #include "base/strings/string_util.h"
19 #include "url/url_canon_stdstring.h"
20 #include "url/url_util.h"
22 namespace {
24 static std::string* empty_string = NULL;
25 static GURL* empty_gurl = NULL;
27 #ifdef WIN32
29 // Returns a static reference to an empty string for returning a reference
30 // when there is no underlying string.
31 const std::string& EmptyStringForGURL() {
32 // Avoid static object construction/destruction on startup/shutdown.
33 if (!empty_string) {
34 // Create the string. Be careful that we don't break in the case that this
35 // is being called from multiple threads. Statics are not threadsafe.
36 std::string* new_empty_string = new std::string;
37 if (InterlockedCompareExchangePointer(
38 reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
39 // The old value was non-NULL, so no replacement was done. Another
40 // thread did the initialization out from under us.
41 delete new_empty_string;
44 return *empty_string;
47 #else
49 static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
50 static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
52 void EmptyStringForGURLOnce(void) {
53 empty_string = new std::string;
56 const std::string& EmptyStringForGURL() {
57 // Avoid static object construction/destruction on startup/shutdown.
58 pthread_once(&empty_string_once, EmptyStringForGURLOnce);
59 return *empty_string;
62 #endif // WIN32
64 } // namespace
66 GURL::GURL() : is_valid_(false) {
69 GURL::GURL(const GURL& other)
70 : spec_(other.spec_),
71 is_valid_(other.is_valid_),
72 parsed_(other.parsed_) {
73 if (other.inner_url_)
74 inner_url_.reset(new GURL(*other.inner_url_));
75 // Valid filesystem urls should always have an inner_url_.
76 DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
79 GURL::GURL(const std::string& url_string) {
80 InitCanonical(url_string, true);
83 GURL::GURL(const base::string16& url_string) {
84 InitCanonical(url_string, true);
87 GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
88 InitCanonical(url_string, false);
91 GURL::GURL(const char* canonical_spec,
92 size_t canonical_spec_len,
93 const url::Parsed& parsed,
94 bool is_valid)
95 : spec_(canonical_spec, canonical_spec_len),
96 is_valid_(is_valid),
97 parsed_(parsed) {
98 InitializeFromCanonicalSpec();
101 GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
102 : is_valid_(is_valid),
103 parsed_(parsed) {
104 spec_.swap(canonical_spec);
105 InitializeFromCanonicalSpec();
108 template<typename STR>
109 void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
110 // Reserve enough room in the output for the input, plus some extra so that
111 // we have room if we have to escape a few things without reallocating.
112 spec_.reserve(input_spec.size() + 32);
113 url::StdStringCanonOutput output(&spec_);
114 is_valid_ = url::Canonicalize(
115 input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
116 NULL, &output, &parsed_);
118 output.Complete(); // Must be done before using string.
119 if (is_valid_ && SchemeIsFileSystem()) {
120 inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
121 *parsed_.inner_parsed(), true));
125 void GURL::InitializeFromCanonicalSpec() {
126 if (is_valid_ && SchemeIsFileSystem()) {
127 inner_url_.reset(
128 new GURL(spec_.data(), parsed_.Length(),
129 *parsed_.inner_parsed(), true));
132 #ifndef NDEBUG
133 // For testing purposes, check that the parsed canonical URL is identical to
134 // what we would have produced. Skip checking for invalid URLs have no meaning
135 // and we can't always canonicalize then reproducabely.
136 if (is_valid_) {
137 url::Component scheme;
138 // We can't do this check on the inner_url of a filesystem URL, as
139 // canonical_spec actually points to the start of the outer URL, so we'd
140 // end up with infinite recursion in this constructor.
141 if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
142 url::kFileSystemScheme, &scheme) ||
143 scheme.begin == parsed_.scheme.begin) {
144 // We need to retain trailing whitespace on path URLs, as the |parsed_|
145 // spec we originally received may legitimately contain trailing white-
146 // space on the path or components e.g. if the #ref has been
147 // removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
148 GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
150 DCHECK(test_url.is_valid_ == is_valid_);
151 DCHECK(test_url.spec_ == spec_);
153 DCHECK(test_url.parsed_.scheme == parsed_.scheme);
154 DCHECK(test_url.parsed_.username == parsed_.username);
155 DCHECK(test_url.parsed_.password == parsed_.password);
156 DCHECK(test_url.parsed_.host == parsed_.host);
157 DCHECK(test_url.parsed_.port == parsed_.port);
158 DCHECK(test_url.parsed_.path == parsed_.path);
159 DCHECK(test_url.parsed_.query == parsed_.query);
160 DCHECK(test_url.parsed_.ref == parsed_.ref);
163 #endif
166 GURL::~GURL() {
169 GURL& GURL::operator=(GURL other) {
170 Swap(&other);
171 return *this;
174 const std::string& GURL::spec() const {
175 if (is_valid_ || spec_.empty())
176 return spec_;
178 DCHECK(false) << "Trying to get the spec of an invalid URL!";
179 return EmptyStringForGURL();
182 bool GURL::operator==(const GURL& other) const {
183 return spec_ == other.spec_;
186 bool GURL::operator!=(const GURL& other) const {
187 return spec_ != other.spec_;
190 bool GURL::operator<(const GURL& other) const {
191 return spec_ < other.spec_;
194 bool GURL::operator>(const GURL& other) const {
195 return spec_ > other.spec_;
198 // Note: code duplicated below (it's inconvenient to use a template here).
199 GURL GURL::Resolve(const std::string& relative) const {
200 // Not allowed for invalid URLs.
201 if (!is_valid_)
202 return GURL();
204 GURL result;
206 // Reserve enough room in the output for the input, plus some extra so that
207 // we have room if we have to escape a few things without reallocating.
208 result.spec_.reserve(spec_.size() + 32);
209 url::StdStringCanonOutput output(&result.spec_);
211 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
212 parsed_, relative.data(),
213 static_cast<int>(relative.length()),
214 nullptr, &output, &result.parsed_)) {
215 // Error resolving, return an empty URL.
216 return GURL();
219 output.Complete();
220 result.is_valid_ = true;
221 if (result.SchemeIsFileSystem()) {
222 result.inner_url_.reset(
223 new GURL(result.spec_.data(), result.parsed_.Length(),
224 *result.parsed_.inner_parsed(), true));
226 return result;
229 // Note: code duplicated above (it's inconvenient to use a template here).
230 GURL GURL::Resolve(const base::string16& relative) const {
231 // Not allowed for invalid URLs.
232 if (!is_valid_)
233 return GURL();
235 GURL result;
237 // Reserve enough room in the output for the input, plus some extra so that
238 // we have room if we have to escape a few things without reallocating.
239 result.spec_.reserve(spec_.size() + 32);
240 url::StdStringCanonOutput output(&result.spec_);
242 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
243 parsed_, relative.data(),
244 static_cast<int>(relative.length()),
245 nullptr, &output, &result.parsed_)) {
246 // Error resolving, return an empty URL.
247 return GURL();
250 output.Complete();
251 result.is_valid_ = true;
252 if (result.SchemeIsFileSystem()) {
253 result.inner_url_.reset(
254 new GURL(result.spec_.data(), result.parsed_.Length(),
255 *result.parsed_.inner_parsed(), true));
257 return result;
260 // Note: code duplicated below (it's inconvenient to use a template here).
261 GURL GURL::ReplaceComponents(
262 const url::Replacements<char>& replacements) const {
263 GURL result;
265 // Not allowed for invalid URLs.
266 if (!is_valid_)
267 return GURL();
269 // Reserve enough room in the output for the input, plus some extra so that
270 // we have room if we have to escape a few things without reallocating.
271 result.spec_.reserve(spec_.size() + 32);
272 url::StdStringCanonOutput output(&result.spec_);
274 result.is_valid_ = url::ReplaceComponents(
275 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
276 NULL, &output, &result.parsed_);
278 output.Complete();
279 if (result.is_valid_ && result.SchemeIsFileSystem()) {
280 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
281 *result.parsed_.inner_parsed(), true));
283 return result;
286 // Note: code duplicated above (it's inconvenient to use a template here).
287 GURL GURL::ReplaceComponents(
288 const url::Replacements<base::char16>& replacements) const {
289 GURL result;
291 // Not allowed for invalid URLs.
292 if (!is_valid_)
293 return GURL();
295 // Reserve enough room in the output for the input, plus some extra so that
296 // we have room if we have to escape a few things without reallocating.
297 result.spec_.reserve(spec_.size() + 32);
298 url::StdStringCanonOutput output(&result.spec_);
300 result.is_valid_ = url::ReplaceComponents(
301 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
302 NULL, &output, &result.parsed_);
304 output.Complete();
305 if (result.is_valid_ && result.SchemeIsFileSystem()) {
306 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
307 *result.parsed_.inner_parsed(), true));
309 return result;
312 GURL GURL::GetOrigin() const {
313 // This doesn't make sense for invalid or nonstandard URLs, so return
314 // the empty URL
315 if (!is_valid_ || !IsStandard())
316 return GURL();
318 if (SchemeIsFileSystem())
319 return inner_url_->GetOrigin();
321 url::Replacements<char> replacements;
322 replacements.ClearUsername();
323 replacements.ClearPassword();
324 replacements.ClearPath();
325 replacements.ClearQuery();
326 replacements.ClearRef();
328 return ReplaceComponents(replacements);
331 GURL GURL::GetAsReferrer() const {
332 if (!is_valid_ || !SchemeIsHTTPOrHTTPS())
333 return GURL();
335 if (!has_ref() && !has_username() && !has_password())
336 return GURL(*this);
338 url::Replacements<char> replacements;
339 replacements.ClearRef();
340 replacements.ClearUsername();
341 replacements.ClearPassword();
342 return ReplaceComponents(replacements);
345 GURL GURL::GetWithEmptyPath() const {
346 // This doesn't make sense for invalid or nonstandard URLs, so return
347 // the empty URL.
348 if (!is_valid_ || !IsStandard())
349 return GURL();
351 // We could optimize this since we know that the URL is canonical, and we are
352 // appending a canonical path, so avoiding re-parsing.
353 GURL other(*this);
354 if (parsed_.path.len == 0)
355 return other;
357 // Clear everything after the path.
358 other.parsed_.query.reset();
359 other.parsed_.ref.reset();
361 // Set the path, since the path is longer than one, we can just set the
362 // first character and resize.
363 other.spec_[other.parsed_.path.begin] = '/';
364 other.parsed_.path.len = 1;
365 other.spec_.resize(other.parsed_.path.begin + 1);
366 return other;
369 bool GURL::IsStandard() const {
370 return url::IsStandard(spec_.data(), parsed_.scheme);
373 bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
374 if (parsed_.scheme.len <= 0)
375 return lower_ascii_scheme == NULL;
376 return base::LowerCaseEqualsASCII(
377 base::StringPiece(spec_.data() + parsed_.scheme.begin,
378 parsed_.scheme.len),
379 lower_ascii_scheme);
382 bool GURL::SchemeIsHTTPOrHTTPS() const {
383 return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
386 bool GURL::SchemeIsWSOrWSS() const {
387 return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
390 int GURL::IntPort() const {
391 if (parsed_.port.is_nonempty())
392 return url::ParsePort(spec_.data(), parsed_.port);
393 return url::PORT_UNSPECIFIED;
396 int GURL::EffectiveIntPort() const {
397 int int_port = IntPort();
398 if (int_port == url::PORT_UNSPECIFIED && IsStandard())
399 return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
400 parsed_.scheme.len);
401 return int_port;
404 std::string GURL::ExtractFileName() const {
405 url::Component file_component;
406 url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
407 return ComponentString(file_component);
410 std::string GURL::PathForRequest() const {
411 DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
412 if (parsed_.ref.len >= 0) {
413 // Clip off the reference when it exists. The reference starts after the #
414 // sign, so we have to subtract one to also remove it.
415 return std::string(spec_, parsed_.path.begin,
416 parsed_.ref.begin - parsed_.path.begin - 1);
418 // Compute the actual path length, rather than depending on the spec's
419 // terminator. If we're an inner_url, our spec continues on into our outer
420 // url's path/query/ref.
421 int path_len = parsed_.path.len;
422 if (parsed_.query.is_valid())
423 path_len = parsed_.query.end() - parsed_.path.begin;
425 return std::string(spec_, parsed_.path.begin, path_len);
428 std::string GURL::HostNoBrackets() const {
429 // If host looks like an IPv6 literal, strip the square brackets.
430 url::Component h(parsed_.host);
431 if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
432 h.begin++;
433 h.len -= 2;
435 return ComponentString(h);
438 std::string GURL::GetContent() const {
439 return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
442 bool GURL::HostIsIPAddress() const {
443 if (!is_valid_ || spec_.empty())
444 return false;
446 url::RawCanonOutputT<char, 128> ignored_output;
447 url::CanonHostInfo host_info;
448 url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
449 &host_info);
450 return host_info.IsIPAddress();
453 #ifdef WIN32
455 const GURL& GURL::EmptyGURL() {
456 // Avoid static object construction/destruction on startup/shutdown.
457 if (!empty_gurl) {
458 // Create the string. Be careful that we don't break in the case that this
459 // is being called from multiple threads.
460 GURL* new_empty_gurl = new GURL;
461 if (InterlockedCompareExchangePointer(
462 reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
463 // The old value was non-NULL, so no replacement was done. Another
464 // thread did the initialization out from under us.
465 delete new_empty_gurl;
468 return *empty_gurl;
471 #else
473 void EmptyGURLOnce(void) {
474 empty_gurl = new GURL;
477 const GURL& GURL::EmptyGURL() {
478 // Avoid static object construction/destruction on startup/shutdown.
479 pthread_once(&empty_gurl_once, EmptyGURLOnce);
480 return *empty_gurl;
483 #endif // WIN32
485 bool GURL::DomainIs(base::StringPiece lower_ascii_domain) const {
486 if (!is_valid_ || lower_ascii_domain.empty())
487 return false;
489 // FileSystem URLs have empty parsed_.host, so check this first.
490 if (SchemeIsFileSystem() && inner_url_)
491 return inner_url_->DomainIs(lower_ascii_domain);
493 if (!parsed_.host.is_nonempty())
494 return false;
496 // If the host name ends with a dot but the input domain doesn't,
497 // then we ignore the dot in the host name.
498 const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
499 int host_len = parsed_.host.len;
500 int domain_len = lower_ascii_domain.length();
501 if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
502 host_last_pos--;
503 host_len--;
506 if (host_len < domain_len)
507 return false;
509 // |host_first_pos| is the start of the compared part of the host name, not
510 // start of the whole host name.
511 const char* host_first_pos = spec_.data() + parsed_.host.begin +
512 host_len - domain_len;
514 if (!base::LowerCaseEqualsASCII(
515 base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
516 return false;
518 // Make sure there aren't extra characters in host before the compared part;
519 // if the host name is longer than the input domain name, then the character
520 // immediately before the compared part should be a dot. For example,
521 // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
522 if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
523 '.' != *(host_first_pos - 1))
524 return false;
526 return true;
529 void GURL::Swap(GURL* other) {
530 spec_.swap(other->spec_);
531 std::swap(is_valid_, other->is_valid_);
532 std::swap(parsed_, other->parsed_);
533 inner_url_.swap(other->inner_url_);
536 std::ostream& operator<<(std::ostream& out, const GURL& url) {
537 return out << url.possibly_invalid_spec();