Don't preload rarely seen large images
[chromium-blink-merge.git] / url / gurl.cc
blob52aad734966e7f3ade3a270f33d5cd4d23d58025
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifdef WIN32
6 #include <windows.h>
7 #else
8 #include <pthread.h>
9 #endif
11 #include <algorithm>
12 #include <ostream>
14 #include "url/gurl.h"
16 #include "base/logging.h"
17 #include "base/strings/string_util.h"
18 #include "url/url_canon_stdstring.h"
19 #include "url/url_util.h"
21 namespace {
23 static std::string* empty_string = NULL;
24 static GURL* empty_gurl = NULL;
26 #ifdef WIN32
28 // Returns a static reference to an empty string for returning a reference
29 // when there is no underlying string.
30 const std::string& EmptyStringForGURL() {
31 // Avoid static object construction/destruction on startup/shutdown.
32 if (!empty_string) {
33 // Create the string. Be careful that we don't break in the case that this
34 // is being called from multiple threads. Statics are not threadsafe.
35 std::string* new_empty_string = new std::string;
36 if (InterlockedCompareExchangePointer(
37 reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
38 // The old value was non-NULL, so no replacement was done. Another
39 // thread did the initialization out from under us.
40 delete new_empty_string;
43 return *empty_string;
46 #else
48 static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
49 static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
51 void EmptyStringForGURLOnce(void) {
52 empty_string = new std::string;
55 const std::string& EmptyStringForGURL() {
56 // Avoid static object construction/destruction on startup/shutdown.
57 pthread_once(&empty_string_once, EmptyStringForGURLOnce);
58 return *empty_string;
61 #endif // WIN32
63 } // namespace
65 GURL::GURL() : is_valid_(false) {
68 GURL::GURL(const GURL& other)
69 : spec_(other.spec_),
70 is_valid_(other.is_valid_),
71 parsed_(other.parsed_) {
72 if (other.inner_url_)
73 inner_url_.reset(new GURL(*other.inner_url_));
74 // Valid filesystem urls should always have an inner_url_.
75 DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
78 GURL::GURL(const std::string& url_string) {
79 InitCanonical(url_string, true);
82 GURL::GURL(const base::string16& url_string) {
83 InitCanonical(url_string, true);
86 GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
87 InitCanonical(url_string, false);
90 GURL::GURL(const char* canonical_spec,
91 size_t canonical_spec_len,
92 const url::Parsed& parsed,
93 bool is_valid)
94 : spec_(canonical_spec, canonical_spec_len),
95 is_valid_(is_valid),
96 parsed_(parsed) {
97 InitializeFromCanonicalSpec();
100 GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
101 : is_valid_(is_valid),
102 parsed_(parsed) {
103 spec_.swap(canonical_spec);
104 InitializeFromCanonicalSpec();
107 template<typename STR>
108 void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
109 // Reserve enough room in the output for the input, plus some extra so that
110 // we have room if we have to escape a few things without reallocating.
111 spec_.reserve(input_spec.size() + 32);
112 url::StdStringCanonOutput output(&spec_);
113 is_valid_ = url::Canonicalize(
114 input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
115 NULL, &output, &parsed_);
117 output.Complete(); // Must be done before using string.
118 if (is_valid_ && SchemeIsFileSystem()) {
119 inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
120 *parsed_.inner_parsed(), true));
124 void GURL::InitializeFromCanonicalSpec() {
125 if (is_valid_ && SchemeIsFileSystem()) {
126 inner_url_.reset(
127 new GURL(spec_.data(), parsed_.Length(),
128 *parsed_.inner_parsed(), true));
131 #ifndef NDEBUG
132 // For testing purposes, check that the parsed canonical URL is identical to
133 // what we would have produced. Skip checking for invalid URLs have no meaning
134 // and we can't always canonicalize then reproducabely.
135 if (is_valid_) {
136 url::Component scheme;
137 // We can't do this check on the inner_url of a filesystem URL, as
138 // canonical_spec actually points to the start of the outer URL, so we'd
139 // end up with infinite recursion in this constructor.
140 if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
141 url::kFileSystemScheme, &scheme) ||
142 scheme.begin == parsed_.scheme.begin) {
143 // We need to retain trailing whitespace on path URLs, as the |parsed_|
144 // spec we originally received may legitimately contain trailing white-
145 // space on the path or components e.g. if the #ref has been
146 // removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
147 GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
149 DCHECK(test_url.is_valid_ == is_valid_);
150 DCHECK(test_url.spec_ == spec_);
152 DCHECK(test_url.parsed_.scheme == parsed_.scheme);
153 DCHECK(test_url.parsed_.username == parsed_.username);
154 DCHECK(test_url.parsed_.password == parsed_.password);
155 DCHECK(test_url.parsed_.host == parsed_.host);
156 DCHECK(test_url.parsed_.port == parsed_.port);
157 DCHECK(test_url.parsed_.path == parsed_.path);
158 DCHECK(test_url.parsed_.query == parsed_.query);
159 DCHECK(test_url.parsed_.ref == parsed_.ref);
162 #endif
165 GURL::~GURL() {
168 GURL& GURL::operator=(GURL other) {
169 Swap(&other);
170 return *this;
173 const std::string& GURL::spec() const {
174 if (is_valid_ || spec_.empty())
175 return spec_;
177 DCHECK(false) << "Trying to get the spec of an invalid URL!";
178 return EmptyStringForGURL();
181 bool GURL::operator==(const GURL& other) const {
182 return spec_ == other.spec_;
185 bool GURL::operator!=(const GURL& other) const {
186 return spec_ != other.spec_;
189 bool GURL::operator<(const GURL& other) const {
190 return spec_ < other.spec_;
193 bool GURL::operator>(const GURL& other) const {
194 return spec_ > other.spec_;
197 GURL GURL::Resolve(const std::string& relative) const {
198 return ResolveWithCharsetConverter(relative, NULL);
200 GURL GURL::Resolve(const base::string16& relative) const {
201 return ResolveWithCharsetConverter(relative, NULL);
204 // Note: code duplicated below (it's inconvenient to use a template here).
205 GURL GURL::ResolveWithCharsetConverter(
206 const std::string& relative,
207 url::CharsetConverter* charset_converter) const {
208 // Not allowed for invalid URLs.
209 if (!is_valid_)
210 return GURL();
212 GURL result;
214 // Reserve enough room in the output for the input, plus some extra so that
215 // we have room if we have to escape a few things without reallocating.
216 result.spec_.reserve(spec_.size() + 32);
217 url::StdStringCanonOutput output(&result.spec_);
219 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
220 parsed_, relative.data(),
221 static_cast<int>(relative.length()),
222 charset_converter, &output, &result.parsed_)) {
223 // Error resolving, return an empty URL.
224 return GURL();
227 output.Complete();
228 result.is_valid_ = true;
229 if (result.SchemeIsFileSystem()) {
230 result.inner_url_.reset(
231 new GURL(result.spec_.data(), result.parsed_.Length(),
232 *result.parsed_.inner_parsed(), true));
234 return result;
237 // Note: code duplicated above (it's inconvenient to use a template here).
238 GURL GURL::ResolveWithCharsetConverter(
239 const base::string16& relative,
240 url::CharsetConverter* charset_converter) const {
241 // Not allowed for invalid URLs.
242 if (!is_valid_)
243 return GURL();
245 GURL result;
247 // Reserve enough room in the output for the input, plus some extra so that
248 // we have room if we have to escape a few things without reallocating.
249 result.spec_.reserve(spec_.size() + 32);
250 url::StdStringCanonOutput output(&result.spec_);
252 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
253 parsed_, relative.data(),
254 static_cast<int>(relative.length()),
255 charset_converter, &output, &result.parsed_)) {
256 // Error resolving, return an empty URL.
257 return GURL();
260 output.Complete();
261 result.is_valid_ = true;
262 if (result.SchemeIsFileSystem()) {
263 result.inner_url_.reset(
264 new GURL(result.spec_.data(), result.parsed_.Length(),
265 *result.parsed_.inner_parsed(), true));
267 return result;
270 // Note: code duplicated below (it's inconvenient to use a template here).
271 GURL GURL::ReplaceComponents(
272 const url::Replacements<char>& replacements) const {
273 GURL result;
275 // Not allowed for invalid URLs.
276 if (!is_valid_)
277 return GURL();
279 // Reserve enough room in the output for the input, plus some extra so that
280 // we have room if we have to escape a few things without reallocating.
281 result.spec_.reserve(spec_.size() + 32);
282 url::StdStringCanonOutput output(&result.spec_);
284 result.is_valid_ = url::ReplaceComponents(
285 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
286 NULL, &output, &result.parsed_);
288 output.Complete();
289 if (result.is_valid_ && result.SchemeIsFileSystem()) {
290 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
291 *result.parsed_.inner_parsed(), true));
293 return result;
296 // Note: code duplicated above (it's inconvenient to use a template here).
297 GURL GURL::ReplaceComponents(
298 const url::Replacements<base::char16>& replacements) const {
299 GURL result;
301 // Not allowed for invalid URLs.
302 if (!is_valid_)
303 return GURL();
305 // Reserve enough room in the output for the input, plus some extra so that
306 // we have room if we have to escape a few things without reallocating.
307 result.spec_.reserve(spec_.size() + 32);
308 url::StdStringCanonOutput output(&result.spec_);
310 result.is_valid_ = url::ReplaceComponents(
311 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
312 NULL, &output, &result.parsed_);
314 output.Complete();
315 if (result.is_valid_ && result.SchemeIsFileSystem()) {
316 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
317 *result.parsed_.inner_parsed(), true));
319 return result;
322 GURL GURL::GetOrigin() const {
323 // This doesn't make sense for invalid or nonstandard URLs, so return
324 // the empty URL
325 if (!is_valid_ || !IsStandard())
326 return GURL();
328 if (SchemeIsFileSystem())
329 return inner_url_->GetOrigin();
331 url::Replacements<char> replacements;
332 replacements.ClearUsername();
333 replacements.ClearPassword();
334 replacements.ClearPath();
335 replacements.ClearQuery();
336 replacements.ClearRef();
338 return ReplaceComponents(replacements);
341 GURL GURL::GetAsReferrer() const {
342 if (!is_valid_ || !SchemeIsHTTPOrHTTPS())
343 return GURL();
345 if (!has_ref() && !has_username() && !has_password())
346 return GURL(*this);
348 url::Replacements<char> replacements;
349 replacements.ClearRef();
350 replacements.ClearUsername();
351 replacements.ClearPassword();
352 return ReplaceComponents(replacements);
355 GURL GURL::GetWithEmptyPath() const {
356 // This doesn't make sense for invalid or nonstandard URLs, so return
357 // the empty URL.
358 if (!is_valid_ || !IsStandard())
359 return GURL();
361 // We could optimize this since we know that the URL is canonical, and we are
362 // appending a canonical path, so avoiding re-parsing.
363 GURL other(*this);
364 if (parsed_.path.len == 0)
365 return other;
367 // Clear everything after the path.
368 other.parsed_.query.reset();
369 other.parsed_.ref.reset();
371 // Set the path, since the path is longer than one, we can just set the
372 // first character and resize.
373 other.spec_[other.parsed_.path.begin] = '/';
374 other.parsed_.path.len = 1;
375 other.spec_.resize(other.parsed_.path.begin + 1);
376 return other;
379 bool GURL::IsStandard() const {
380 return url::IsStandard(spec_.data(), parsed_.scheme);
383 bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
384 if (parsed_.scheme.len <= 0)
385 return lower_ascii_scheme == NULL;
386 return base::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin,
387 spec_.data() + parsed_.scheme.end(),
388 lower_ascii_scheme);
391 bool GURL::SchemeIsHTTPOrHTTPS() const {
392 return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
395 bool GURL::SchemeIsWSOrWSS() const {
396 return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
399 int GURL::IntPort() const {
400 if (parsed_.port.is_nonempty())
401 return url::ParsePort(spec_.data(), parsed_.port);
402 return url::PORT_UNSPECIFIED;
405 int GURL::EffectiveIntPort() const {
406 int int_port = IntPort();
407 if (int_port == url::PORT_UNSPECIFIED && IsStandard())
408 return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
409 parsed_.scheme.len);
410 return int_port;
413 std::string GURL::ExtractFileName() const {
414 url::Component file_component;
415 url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
416 return ComponentString(file_component);
419 std::string GURL::PathForRequest() const {
420 DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
421 if (parsed_.ref.len >= 0) {
422 // Clip off the reference when it exists. The reference starts after the #
423 // sign, so we have to subtract one to also remove it.
424 return std::string(spec_, parsed_.path.begin,
425 parsed_.ref.begin - parsed_.path.begin - 1);
427 // Compute the actual path length, rather than depending on the spec's
428 // terminator. If we're an inner_url, our spec continues on into our outer
429 // url's path/query/ref.
430 int path_len = parsed_.path.len;
431 if (parsed_.query.is_valid())
432 path_len = parsed_.query.end() - parsed_.path.begin;
434 return std::string(spec_, parsed_.path.begin, path_len);
437 std::string GURL::HostNoBrackets() const {
438 // If host looks like an IPv6 literal, strip the square brackets.
439 url::Component h(parsed_.host);
440 if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
441 h.begin++;
442 h.len -= 2;
444 return ComponentString(h);
447 std::string GURL::GetContent() const {
448 return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
451 bool GURL::HostIsIPAddress() const {
452 if (!is_valid_ || spec_.empty())
453 return false;
455 url::RawCanonOutputT<char, 128> ignored_output;
456 url::CanonHostInfo host_info;
457 url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
458 &host_info);
459 return host_info.IsIPAddress();
462 #ifdef WIN32
464 const GURL& GURL::EmptyGURL() {
465 // Avoid static object construction/destruction on startup/shutdown.
466 if (!empty_gurl) {
467 // Create the string. Be careful that we don't break in the case that this
468 // is being called from multiple threads.
469 GURL* new_empty_gurl = new GURL;
470 if (InterlockedCompareExchangePointer(
471 reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
472 // The old value was non-NULL, so no replacement was done. Another
473 // thread did the initialization out from under us.
474 delete new_empty_gurl;
477 return *empty_gurl;
480 #else
482 void EmptyGURLOnce(void) {
483 empty_gurl = new GURL;
486 const GURL& GURL::EmptyGURL() {
487 // Avoid static object construction/destruction on startup/shutdown.
488 pthread_once(&empty_gurl_once, EmptyGURLOnce);
489 return *empty_gurl;
492 #endif // WIN32
494 bool GURL::DomainIs(const char* lower_ascii_domain,
495 int domain_len) const {
496 // Return false if this URL is not valid or domain is empty.
497 if (!is_valid_ || !domain_len)
498 return false;
500 // FileSystem URLs have empty parsed_.host, so check this first.
501 if (SchemeIsFileSystem() && inner_url_)
502 return inner_url_->DomainIs(lower_ascii_domain, domain_len);
504 if (!parsed_.host.is_nonempty())
505 return false;
507 // Check whether the host name is end with a dot. If yes, treat it
508 // the same as no-dot unless the input comparison domain is end
509 // with dot.
510 const char* last_pos = spec_.data() + parsed_.host.end() - 1;
511 int host_len = parsed_.host.len;
512 if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
513 last_pos--;
514 host_len--;
517 // Return false if host's length is less than domain's length.
518 if (host_len < domain_len)
519 return false;
521 // Compare this url whether belong specific domain.
522 const char* start_pos = spec_.data() + parsed_.host.begin +
523 host_len - domain_len;
525 if (!base::LowerCaseEqualsASCII(start_pos,
526 last_pos + 1,
527 lower_ascii_domain,
528 lower_ascii_domain + domain_len))
529 return false;
531 // Check whether host has right domain start with dot, make sure we got
532 // right domain range. For example www.google.com has domain
533 // "google.com" but www.iamnotgoogle.com does not.
534 if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
535 '.' != *(start_pos - 1))
536 return false;
538 return true;
541 void GURL::Swap(GURL* other) {
542 spec_.swap(other->spec_);
543 std::swap(is_valid_, other->is_valid_);
544 std::swap(parsed_, other->parsed_);
545 inner_url_.swap(other->inner_url_);
548 std::ostream& operator<<(std::ostream& out, const GURL& url) {
549 return out << url.possibly_invalid_spec();