Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / components / url_formatter / elide_url.cc
blob80e7866880d25f130c5051b9d6eacc1238a4ed59
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/url_formatter/elide_url.h"
7 #include "base/logging.h"
8 #include "base/strings/string_split.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "components/url_formatter/url_formatter.h"
11 #include "net/base/escape.h"
12 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
13 #include "url/gurl.h"
14 #include "url/url_constants.h"
16 #if !defined(OS_ANDROID)
17 #include "ui/gfx/text_elider.h" // nogncheck
18 #include "ui/gfx/text_utils.h" // nogncheck
19 #endif
21 namespace {
23 #if !defined(OS_ANDROID)
24 const base::char16 kDot = '.';
26 // Build a path from the first |num_components| elements in |path_elements|.
27 // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
28 base::string16 BuildPathFromComponents(
29 const base::string16& path_prefix,
30 const std::vector<base::string16>& path_elements,
31 const base::string16& filename,
32 size_t num_components) {
33 // Add the initial elements of the path.
34 base::string16 path = path_prefix;
36 // Build path from first |num_components| elements.
37 for (size_t j = 0; j < num_components; ++j)
38 path += path_elements[j] + gfx::kForwardSlash;
40 // Add |filename|, ellipsis if necessary.
41 if (num_components != (path_elements.size() - 1))
42 path += base::string16(gfx::kEllipsisUTF16) + gfx::kForwardSlash;
43 path += filename;
45 return path;
48 // Takes a prefix (Domain, or Domain+subdomain) and a collection of path
49 // components and elides if possible. Returns a string containing the longest
50 // possible elided path, or an empty string if elision is not possible.
51 base::string16 ElideComponentizedPath(
52 const base::string16& url_path_prefix,
53 const std::vector<base::string16>& url_path_elements,
54 const base::string16& url_filename,
55 const base::string16& url_query,
56 const gfx::FontList& font_list,
57 float available_pixel_width) {
58 const size_t url_path_number_of_elements = url_path_elements.size();
60 CHECK(url_path_number_of_elements);
61 for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
62 base::string16 elided_path = BuildPathFromComponents(
63 url_path_prefix, url_path_elements, url_filename, i);
64 if (available_pixel_width >= gfx::GetStringWidthF(elided_path, font_list))
65 return gfx::ElideText(elided_path + url_query, font_list,
66 available_pixel_width, gfx::ELIDE_TAIL);
69 return base::string16();
72 // Splits the hostname in the |url| into sub-strings for the full hostname,
73 // the domain (TLD+1), and the subdomain (everything leading the domain).
74 void SplitHost(const GURL& url,
75 base::string16* url_host,
76 base::string16* url_domain,
77 base::string16* url_subdomain) {
78 // Get Host.
79 *url_host = base::UTF8ToUTF16(url.host());
81 // Get domain and registry information from the URL.
82 *url_domain =
83 base::UTF8ToUTF16(net::registry_controlled_domains::GetDomainAndRegistry(
84 url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
85 if (url_domain->empty())
86 *url_domain = *url_host;
88 // Add port if required.
89 if (!url.port().empty()) {
90 *url_host += base::UTF8ToUTF16(":" + url.port());
91 *url_domain += base::UTF8ToUTF16(":" + url.port());
94 // Get sub domain.
95 const size_t domain_start_index = url_host->find(*url_domain);
96 base::string16 kWwwPrefix = base::UTF8ToUTF16("www.");
97 if (domain_start_index != base::string16::npos)
98 *url_subdomain = url_host->substr(0, domain_start_index);
99 if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() ||
100 url.SchemeIsFile())) {
101 url_subdomain->clear();
105 #endif // !defined(OS_ANDROID)
107 base::string16 FormatUrlForSecurityDisplayInternal(const GURL& url,
108 const std::string& languages,
109 bool omit_scheme) {
110 if (!url.is_valid() || url.is_empty() || !url.IsStandard())
111 return url_formatter::FormatUrl(url, languages);
113 const base::string16 colon(base::ASCIIToUTF16(":"));
114 const base::string16 scheme_separator(
115 base::ASCIIToUTF16(url::kStandardSchemeSeparator));
117 if (url.SchemeIsFile()) {
118 return base::ASCIIToUTF16(url::kFileScheme) + scheme_separator +
119 base::UTF8ToUTF16(url.path());
122 if (url.SchemeIsFileSystem()) {
123 const GURL* inner_url = url.inner_url();
124 if (inner_url->SchemeIsFile()) {
125 return base::ASCIIToUTF16(url::kFileSystemScheme) + colon +
126 FormatUrlForSecurityDisplayInternal(*inner_url, languages,
127 false /*omit_scheme*/) +
128 base::UTF8ToUTF16(url.path());
130 return base::ASCIIToUTF16(url::kFileSystemScheme) + colon +
131 FormatUrlForSecurityDisplayInternal(*inner_url, languages,
132 false /*omit_scheme*/);
135 const GURL origin = url.GetOrigin();
136 const std::string& scheme = origin.scheme();
137 const std::string& host = origin.host();
139 base::string16 result;
140 if (!omit_scheme || !url.SchemeIsHTTPOrHTTPS())
141 result = base::UTF8ToUTF16(scheme) + scheme_separator;
142 result += base::UTF8ToUTF16(host);
144 const int port = origin.IntPort();
145 const int default_port = url::DefaultPortForScheme(
146 scheme.c_str(), static_cast<int>(scheme.length()));
147 if (port != url::PORT_UNSPECIFIED && port != default_port)
148 result += colon + base::UTF8ToUTF16(origin.port());
150 return result;
153 } // namespace
155 namespace url_formatter {
157 #if !defined(OS_ANDROID)
159 // TODO(pkasting): http://crbug.com/77883 This whole function gets
160 // kerning/ligatures/etc. issues potentially wrong by assuming that the width of
161 // a rendered string is always the sum of the widths of its substrings. Also I
162 // suspect it could be made simpler.
163 base::string16 ElideUrl(const GURL& url,
164 const gfx::FontList& font_list,
165 float available_pixel_width,
166 const std::string& languages) {
167 // Get a formatted string and corresponding parsing of the url.
168 url::Parsed parsed;
169 const base::string16 url_string = url_formatter::FormatUrl(
170 url, languages, url_formatter::kFormatUrlOmitAll,
171 net::UnescapeRule::SPACES, &parsed, nullptr, nullptr);
172 if (available_pixel_width <= 0)
173 return url_string;
175 // If non-standard, return plain eliding.
176 if (!url.IsStandard())
177 return gfx::ElideText(url_string, font_list, available_pixel_width,
178 gfx::ELIDE_TAIL);
180 // Now start eliding url_string to fit within available pixel width.
181 // Fist pass - check to see whether entire url_string fits.
182 const float pixel_width_url_string =
183 gfx::GetStringWidthF(url_string, font_list);
184 if (available_pixel_width >= pixel_width_url_string)
185 return url_string;
187 // Get the path substring, including query and reference.
188 const size_t path_start_index = parsed.path.begin;
189 const size_t path_len = parsed.path.len;
190 base::string16 url_path_query_etc = url_string.substr(path_start_index);
191 base::string16 url_path = url_string.substr(path_start_index, path_len);
193 // Return general elided text if url minus the query fits.
194 const base::string16 url_minus_query =
195 url_string.substr(0, path_start_index + path_len);
196 if (available_pixel_width >= gfx::GetStringWidthF(url_minus_query, font_list))
197 return gfx::ElideText(url_string, font_list, available_pixel_width,
198 gfx::ELIDE_TAIL);
200 base::string16 url_host;
201 base::string16 url_domain;
202 base::string16 url_subdomain;
203 SplitHost(url, &url_host, &url_domain, &url_subdomain);
205 // If this is a file type, the path is now defined as everything after ":".
206 // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
207 // domain is now C: - this is a nice hack for eliding to work pleasantly.
208 if (url.SchemeIsFile()) {
209 // Split the path string using ":"
210 const base::string16 kColon(1, ':');
211 std::vector<base::string16> file_path_split = base::SplitString(
212 url_path, kColon, base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
213 if (file_path_split.size() > 1) { // File is of type "file:///C:/.."
214 url_host.clear();
215 url_domain.clear();
216 url_subdomain.clear();
218 url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
219 url_path_query_etc = url_path = file_path_split.at(1);
223 // Second Pass - remove scheme - the rest fits.
224 const float pixel_width_url_host = gfx::GetStringWidthF(url_host, font_list);
225 const float pixel_width_url_path =
226 gfx::GetStringWidthF(url_path_query_etc, font_list);
227 if (available_pixel_width >= pixel_width_url_host + pixel_width_url_path)
228 return url_host + url_path_query_etc;
230 // Third Pass: Subdomain, domain and entire path fits.
231 const float pixel_width_url_domain =
232 gfx::GetStringWidthF(url_domain, font_list);
233 const float pixel_width_url_subdomain =
234 gfx::GetStringWidthF(url_subdomain, font_list);
235 if (available_pixel_width >=
236 pixel_width_url_subdomain + pixel_width_url_domain + pixel_width_url_path)
237 return url_subdomain + url_domain + url_path_query_etc;
239 // Query element.
240 base::string16 url_query;
241 const float kPixelWidthDotsTrailer =
242 gfx::GetStringWidthF(base::string16(gfx::kEllipsisUTF16), font_list);
243 if (parsed.query.is_nonempty()) {
244 url_query = base::UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
245 if (available_pixel_width >=
246 (pixel_width_url_subdomain + pixel_width_url_domain +
247 pixel_width_url_path - gfx::GetStringWidthF(url_query, font_list))) {
248 return gfx::ElideText(url_subdomain + url_domain + url_path_query_etc,
249 font_list, available_pixel_width, gfx::ELIDE_TAIL);
253 // Parse url_path using '/'.
254 std::vector<base::string16> url_path_elements =
255 base::SplitString(url_path, base::string16(1, gfx::kForwardSlash),
256 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
258 // Get filename - note that for a path ending with /
259 // such as www.google.com/intl/ads/, the file name is ads/.
260 base::string16 url_filename(
261 url_path_elements.empty() ? base::string16() : url_path_elements.back());
262 size_t url_path_number_of_elements = url_path_elements.size();
263 if (url_filename.empty() && (url_path_number_of_elements > 1)) {
264 // Path ends with a '/'.
265 --url_path_number_of_elements;
266 url_filename =
267 url_path_elements[url_path_number_of_elements - 1] + gfx::kForwardSlash;
270 const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
271 if (url_path_number_of_elements <= 1 ||
272 url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
273 // No path to elide, or too long of a path (could overflow in loop below)
274 // Just elide this as a text string.
275 return gfx::ElideText(url_subdomain + url_domain + url_path_query_etc,
276 font_list, available_pixel_width, gfx::ELIDE_TAIL);
279 // Start eliding the path and replacing elements by ".../".
280 const base::string16 kEllipsisAndSlash =
281 base::string16(gfx::kEllipsisUTF16) + gfx::kForwardSlash;
282 const float pixel_width_ellipsis_slash =
283 gfx::GetStringWidthF(kEllipsisAndSlash, font_list);
285 // Check with both subdomain and domain.
286 base::string16 elided_path = ElideComponentizedPath(
287 url_subdomain + url_domain, url_path_elements, url_filename, url_query,
288 font_list, available_pixel_width);
289 if (!elided_path.empty())
290 return elided_path;
292 // Check with only domain.
293 // If a subdomain is present, add an ellipsis before domain.
294 // This is added only if the subdomain pixel width is larger than
295 // the pixel width of kEllipsis. Otherwise, subdomain remains,
296 // which means that this case has been resolved earlier.
297 base::string16 url_elided_domain = url_subdomain + url_domain;
298 if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
299 if (!url_subdomain.empty())
300 url_elided_domain = kEllipsisAndSlash[0] + url_domain;
301 else
302 url_elided_domain = url_domain;
304 elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
305 url_filename, url_query, font_list,
306 available_pixel_width);
308 if (!elided_path.empty())
309 return elided_path;
312 // Return elided domain/.../filename anyway.
313 base::string16 final_elided_url_string(url_elided_domain);
314 const float url_elided_domain_width =
315 gfx::GetStringWidthF(url_elided_domain, font_list);
317 // A hack to prevent trailing ".../...".
318 if ((available_pixel_width - url_elided_domain_width) >
319 pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
320 gfx::GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) {
321 final_elided_url_string += BuildPathFromComponents(
322 base::string16(), url_path_elements, url_filename, 1);
323 } else {
324 final_elided_url_string += url_path;
327 return gfx::ElideText(final_elided_url_string, font_list,
328 available_pixel_width, gfx::ELIDE_TAIL);
331 base::string16 ElideHost(const GURL& url,
332 const gfx::FontList& font_list,
333 float available_pixel_width) {
334 base::string16 url_host;
335 base::string16 url_domain;
336 base::string16 url_subdomain;
337 SplitHost(url, &url_host, &url_domain, &url_subdomain);
339 const float pixel_width_url_host = gfx::GetStringWidthF(url_host, font_list);
340 if (available_pixel_width >= pixel_width_url_host)
341 return url_host;
343 if (url_subdomain.empty())
344 return url_domain;
346 const float pixel_width_url_domain =
347 gfx::GetStringWidthF(url_domain, font_list);
348 float subdomain_width = available_pixel_width - pixel_width_url_domain;
349 if (subdomain_width <= 0)
350 return base::string16(gfx::kEllipsisUTF16) + kDot + url_domain;
352 const base::string16 elided_subdomain = gfx::ElideText(
353 url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD);
354 return elided_subdomain + url_domain;
357 #endif // !defined(OS_ANDROID)
359 base::string16 FormatUrlForSecurityDisplay(const GURL& url,
360 const std::string& languages) {
361 return FormatUrlForSecurityDisplayInternal(url, languages, false);
364 base::string16 FormatUrlForSecurityDisplayOmitScheme(
365 const GURL& url,
366 const std::string& languages) {
367 return FormatUrlForSecurityDisplayInternal(url, languages, true);
370 } // namespace url_formatter