1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/url_formatter/elide_url.h"
7 #include "base/logging.h"
8 #include "base/strings/string_split.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "components/url_formatter/url_formatter.h"
11 #include "net/base/escape.h"
12 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "url/url_constants.h"
16 #if !defined(OS_ANDROID)
17 #include "ui/gfx/text_elider.h" // nogncheck
18 #include "ui/gfx/text_utils.h" // nogncheck
23 #if !defined(OS_ANDROID)
24 const base::char16 kDot
= '.';
26 // Build a path from the first |num_components| elements in |path_elements|.
27 // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
28 base::string16
BuildPathFromComponents(
29 const base::string16
& path_prefix
,
30 const std::vector
<base::string16
>& path_elements
,
31 const base::string16
& filename
,
32 size_t num_components
) {
33 // Add the initial elements of the path.
34 base::string16 path
= path_prefix
;
36 // Build path from first |num_components| elements.
37 for (size_t j
= 0; j
< num_components
; ++j
)
38 path
+= path_elements
[j
] + gfx::kForwardSlash
;
40 // Add |filename|, ellipsis if necessary.
41 if (num_components
!= (path_elements
.size() - 1))
42 path
+= base::string16(gfx::kEllipsisUTF16
) + gfx::kForwardSlash
;
48 // Takes a prefix (Domain, or Domain+subdomain) and a collection of path
49 // components and elides if possible. Returns a string containing the longest
50 // possible elided path, or an empty string if elision is not possible.
51 base::string16
ElideComponentizedPath(
52 const base::string16
& url_path_prefix
,
53 const std::vector
<base::string16
>& url_path_elements
,
54 const base::string16
& url_filename
,
55 const base::string16
& url_query
,
56 const gfx::FontList
& font_list
,
57 float available_pixel_width
) {
58 const size_t url_path_number_of_elements
= url_path_elements
.size();
60 CHECK(url_path_number_of_elements
);
61 for (size_t i
= url_path_number_of_elements
- 1; i
> 0; --i
) {
62 base::string16 elided_path
= BuildPathFromComponents(
63 url_path_prefix
, url_path_elements
, url_filename
, i
);
64 if (available_pixel_width
>= gfx::GetStringWidthF(elided_path
, font_list
))
65 return gfx::ElideText(elided_path
+ url_query
, font_list
,
66 available_pixel_width
, gfx::ELIDE_TAIL
);
69 return base::string16();
72 // Splits the hostname in the |url| into sub-strings for the full hostname,
73 // the domain (TLD+1), and the subdomain (everything leading the domain).
74 void SplitHost(const GURL
& url
,
75 base::string16
* url_host
,
76 base::string16
* url_domain
,
77 base::string16
* url_subdomain
) {
79 *url_host
= base::UTF8ToUTF16(url
.host());
81 // Get domain and registry information from the URL.
83 base::UTF8ToUTF16(net::registry_controlled_domains::GetDomainAndRegistry(
84 url
, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
));
85 if (url_domain
->empty())
86 *url_domain
= *url_host
;
88 // Add port if required.
89 if (!url
.port().empty()) {
90 *url_host
+= base::UTF8ToUTF16(":" + url
.port());
91 *url_domain
+= base::UTF8ToUTF16(":" + url
.port());
95 const size_t domain_start_index
= url_host
->find(*url_domain
);
96 base::string16 kWwwPrefix
= base::UTF8ToUTF16("www.");
97 if (domain_start_index
!= base::string16::npos
)
98 *url_subdomain
= url_host
->substr(0, domain_start_index
);
99 if ((*url_subdomain
== kWwwPrefix
|| url_subdomain
->empty() ||
100 url
.SchemeIsFile())) {
101 url_subdomain
->clear();
105 #endif // !defined(OS_ANDROID)
108 namespace url_formatter
{
110 #if !defined(OS_ANDROID)
112 // TODO(pkasting): http://crbug.com/77883 This whole function gets
113 // kerning/ligatures/etc. issues potentially wrong by assuming that the width of
114 // a rendered string is always the sum of the widths of its substrings. Also I
115 // suspect it could be made simpler.
116 base::string16
ElideUrl(const GURL
& url
,
117 const gfx::FontList
& font_list
,
118 float available_pixel_width
,
119 const std::string
& languages
) {
120 // Get a formatted string and corresponding parsing of the url.
122 const base::string16 url_string
= url_formatter::FormatUrl(
123 url
, languages
, url_formatter::kFormatUrlOmitAll
,
124 net::UnescapeRule::SPACES
, &parsed
, nullptr, nullptr);
125 if (available_pixel_width
<= 0)
128 // If non-standard, return plain eliding.
129 if (!url
.IsStandard())
130 return gfx::ElideText(url_string
, font_list
, available_pixel_width
,
133 // Now start eliding url_string to fit within available pixel width.
134 // Fist pass - check to see whether entire url_string fits.
135 const float pixel_width_url_string
=
136 gfx::GetStringWidthF(url_string
, font_list
);
137 if (available_pixel_width
>= pixel_width_url_string
)
140 // Get the path substring, including query and reference.
141 const size_t path_start_index
= parsed
.path
.begin
;
142 const size_t path_len
= parsed
.path
.len
;
143 base::string16 url_path_query_etc
= url_string
.substr(path_start_index
);
144 base::string16 url_path
= url_string
.substr(path_start_index
, path_len
);
146 // Return general elided text if url minus the query fits.
147 const base::string16 url_minus_query
=
148 url_string
.substr(0, path_start_index
+ path_len
);
149 if (available_pixel_width
>= gfx::GetStringWidthF(url_minus_query
, font_list
))
150 return gfx::ElideText(url_string
, font_list
, available_pixel_width
,
153 base::string16 url_host
;
154 base::string16 url_domain
;
155 base::string16 url_subdomain
;
156 SplitHost(url
, &url_host
, &url_domain
, &url_subdomain
);
158 // If this is a file type, the path is now defined as everything after ":".
159 // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
160 // domain is now C: - this is a nice hack for eliding to work pleasantly.
161 if (url
.SchemeIsFile()) {
162 // Split the path string using ":"
163 const base::string16
kColon(1, ':');
164 std::vector
<base::string16
> file_path_split
= base::SplitString(
165 url_path
, kColon
, base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
166 if (file_path_split
.size() > 1) { // File is of type "file:///C:/.."
169 url_subdomain
.clear();
171 url_host
= url_domain
= file_path_split
.at(0).substr(1) + kColon
;
172 url_path_query_etc
= url_path
= file_path_split
.at(1);
176 // Second Pass - remove scheme - the rest fits.
177 const float pixel_width_url_host
= gfx::GetStringWidthF(url_host
, font_list
);
178 const float pixel_width_url_path
=
179 gfx::GetStringWidthF(url_path_query_etc
, font_list
);
180 if (available_pixel_width
>= pixel_width_url_host
+ pixel_width_url_path
)
181 return url_host
+ url_path_query_etc
;
183 // Third Pass: Subdomain, domain and entire path fits.
184 const float pixel_width_url_domain
=
185 gfx::GetStringWidthF(url_domain
, font_list
);
186 const float pixel_width_url_subdomain
=
187 gfx::GetStringWidthF(url_subdomain
, font_list
);
188 if (available_pixel_width
>=
189 pixel_width_url_subdomain
+ pixel_width_url_domain
+ pixel_width_url_path
)
190 return url_subdomain
+ url_domain
+ url_path_query_etc
;
193 base::string16 url_query
;
194 const float kPixelWidthDotsTrailer
=
195 gfx::GetStringWidthF(base::string16(gfx::kEllipsisUTF16
), font_list
);
196 if (parsed
.query
.is_nonempty()) {
197 url_query
= base::UTF8ToUTF16("?") + url_string
.substr(parsed
.query
.begin
);
198 if (available_pixel_width
>=
199 (pixel_width_url_subdomain
+ pixel_width_url_domain
+
200 pixel_width_url_path
- gfx::GetStringWidthF(url_query
, font_list
))) {
201 return gfx::ElideText(url_subdomain
+ url_domain
+ url_path_query_etc
,
202 font_list
, available_pixel_width
, gfx::ELIDE_TAIL
);
206 // Parse url_path using '/'.
207 std::vector
<base::string16
> url_path_elements
=
208 base::SplitString(url_path
, base::string16(1, gfx::kForwardSlash
),
209 base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
211 // Get filename - note that for a path ending with /
212 // such as www.google.com/intl/ads/, the file name is ads/.
213 base::string16
url_filename(
214 url_path_elements
.empty() ? base::string16() : url_path_elements
.back());
215 size_t url_path_number_of_elements
= url_path_elements
.size();
216 if (url_filename
.empty() && (url_path_number_of_elements
> 1)) {
217 // Path ends with a '/'.
218 --url_path_number_of_elements
;
220 url_path_elements
[url_path_number_of_elements
- 1] + gfx::kForwardSlash
;
223 const size_t kMaxNumberOfUrlPathElementsAllowed
= 1024;
224 if (url_path_number_of_elements
<= 1 ||
225 url_path_number_of_elements
> kMaxNumberOfUrlPathElementsAllowed
) {
226 // No path to elide, or too long of a path (could overflow in loop below)
227 // Just elide this as a text string.
228 return gfx::ElideText(url_subdomain
+ url_domain
+ url_path_query_etc
,
229 font_list
, available_pixel_width
, gfx::ELIDE_TAIL
);
232 // Start eliding the path and replacing elements by ".../".
233 const base::string16 kEllipsisAndSlash
=
234 base::string16(gfx::kEllipsisUTF16
) + gfx::kForwardSlash
;
235 const float pixel_width_ellipsis_slash
=
236 gfx::GetStringWidthF(kEllipsisAndSlash
, font_list
);
238 // Check with both subdomain and domain.
239 base::string16 elided_path
= ElideComponentizedPath(
240 url_subdomain
+ url_domain
, url_path_elements
, url_filename
, url_query
,
241 font_list
, available_pixel_width
);
242 if (!elided_path
.empty())
245 // Check with only domain.
246 // If a subdomain is present, add an ellipsis before domain.
247 // This is added only if the subdomain pixel width is larger than
248 // the pixel width of kEllipsis. Otherwise, subdomain remains,
249 // which means that this case has been resolved earlier.
250 base::string16 url_elided_domain
= url_subdomain
+ url_domain
;
251 if (pixel_width_url_subdomain
> kPixelWidthDotsTrailer
) {
252 if (!url_subdomain
.empty())
253 url_elided_domain
= kEllipsisAndSlash
[0] + url_domain
;
255 url_elided_domain
= url_domain
;
257 elided_path
= ElideComponentizedPath(url_elided_domain
, url_path_elements
,
258 url_filename
, url_query
, font_list
,
259 available_pixel_width
);
261 if (!elided_path
.empty())
265 // Return elided domain/.../filename anyway.
266 base::string16
final_elided_url_string(url_elided_domain
);
267 const float url_elided_domain_width
=
268 gfx::GetStringWidthF(url_elided_domain
, font_list
);
270 // A hack to prevent trailing ".../...".
271 if ((available_pixel_width
- url_elided_domain_width
) >
272 pixel_width_ellipsis_slash
+ kPixelWidthDotsTrailer
+
273 gfx::GetStringWidthF(base::ASCIIToUTF16("UV"), font_list
)) {
274 final_elided_url_string
+= BuildPathFromComponents(
275 base::string16(), url_path_elements
, url_filename
, 1);
277 final_elided_url_string
+= url_path
;
280 return gfx::ElideText(final_elided_url_string
, font_list
,
281 available_pixel_width
, gfx::ELIDE_TAIL
);
284 base::string16
ElideHost(const GURL
& url
,
285 const gfx::FontList
& font_list
,
286 float available_pixel_width
) {
287 base::string16 url_host
;
288 base::string16 url_domain
;
289 base::string16 url_subdomain
;
290 SplitHost(url
, &url_host
, &url_domain
, &url_subdomain
);
292 const float pixel_width_url_host
= gfx::GetStringWidthF(url_host
, font_list
);
293 if (available_pixel_width
>= pixel_width_url_host
)
296 if (url_subdomain
.empty())
299 const float pixel_width_url_domain
=
300 gfx::GetStringWidthF(url_domain
, font_list
);
301 float subdomain_width
= available_pixel_width
- pixel_width_url_domain
;
302 if (subdomain_width
<= 0)
303 return base::string16(gfx::kEllipsisUTF16
) + kDot
+ url_domain
;
305 const base::string16 elided_subdomain
= gfx::ElideText(
306 url_subdomain
, font_list
, subdomain_width
, gfx::ELIDE_HEAD
);
307 return elided_subdomain
+ url_domain
;
310 #endif // !defined(OS_ANDROID)
312 base::string16
FormatUrlForSecurityDisplay(const GURL
& url
,
313 const std::string
& languages
) {
314 if (!url
.is_valid() || url
.is_empty() || !url
.IsStandard())
315 return url_formatter::FormatUrl(url
, languages
);
317 const base::string16
colon(base::ASCIIToUTF16(":"));
318 const base::string16
scheme_separator(
319 base::ASCIIToUTF16(url::kStandardSchemeSeparator
));
321 if (url
.SchemeIsFile()) {
322 return base::ASCIIToUTF16(url::kFileScheme
) + scheme_separator
+
323 base::UTF8ToUTF16(url
.path());
326 if (url
.SchemeIsFileSystem()) {
327 const GURL
* inner_url
= url
.inner_url();
328 if (inner_url
->SchemeIsFile()) {
329 return base::ASCIIToUTF16(url::kFileSystemScheme
) + colon
+
330 FormatUrlForSecurityDisplay(*inner_url
, languages
) +
331 base::UTF8ToUTF16(url
.path());
333 return base::ASCIIToUTF16(url::kFileSystemScheme
) + colon
+
334 FormatUrlForSecurityDisplay(*inner_url
, languages
);
337 const GURL origin
= url
.GetOrigin();
338 const std::string
& scheme
= origin
.scheme();
339 const std::string
& host
= origin
.host();
341 base::string16 result
= base::UTF8ToUTF16(scheme
);
342 result
+= scheme_separator
;
343 result
+= base::UTF8ToUTF16(host
);
345 const int port
= origin
.IntPort();
346 const int default_port
= url::DefaultPortForScheme(
347 scheme
.c_str(), static_cast<int>(scheme
.length()));
348 if (port
!= url::PORT_UNSPECIFIED
&& port
!= default_port
)
349 result
+= colon
+ base::UTF8ToUTF16(origin
.port());
353 } // namespace url_formatter