1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/google/core/browser/google_util.h"
10 #include "base/command_line.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "components/google/core/browser/google_switches.h"
17 #include "components/google/core/browser/google_url_tracker.h"
18 #include "components/url_fixer/url_fixer.h"
19 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
20 #include "net/base/url_util.h"
23 // Only use Link Doctor on official builds. It uses an API key, too, but
24 // seems best to just disable it, for more responsive error pages and to reduce
26 #if defined(GOOGLE_CHROME_BUILD)
27 #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc"
29 #define LINKDOCTOR_SERVER_REQUEST_URL ""
33 // Helpers --------------------------------------------------------------------
37 bool gUseMockLinkDoctorBaseURLForTesting
= false;
39 bool IsPathHomePageBase(const std::string
& path
) {
40 return (path
== "/") || (path
== "/webhp");
43 // True if |host| is "[www.]<domain_in_lower_case>.<TLD>" with a valid TLD. If
44 // |subdomain_permission| is ALLOW_SUBDOMAIN, we check against host
45 // "*.<domain_in_lower_case>.<TLD>" instead.
46 bool IsValidHostName(const std::string
& host
,
47 const std::string
& domain_in_lower_case
,
48 google_util::SubdomainPermission subdomain_permission
) {
49 size_t tld_length
= net::registry_controlled_domains::GetRegistryLength(
51 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
,
52 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
);
53 if ((tld_length
== 0) || (tld_length
== std::string::npos
))
55 // Removes the tld and the preceding dot.
56 std::string
host_minus_tld(host
, 0, host
.length() - tld_length
- 1);
57 if (LowerCaseEqualsASCII(host_minus_tld
, domain_in_lower_case
.c_str()))
59 if (subdomain_permission
== google_util::ALLOW_SUBDOMAIN
)
60 return EndsWith(host_minus_tld
, "." + domain_in_lower_case
, false);
61 return LowerCaseEqualsASCII(host_minus_tld
,
62 ("www." + domain_in_lower_case
).c_str());
65 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
66 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
67 // port for its scheme (80 for HTTP, 443 for HTTPS).
68 bool IsValidURL(const GURL
& url
, google_util::PortPermission port_permission
) {
69 return url
.is_valid() && url
.SchemeIsHTTPOrHTTPS() &&
70 (url
.port().empty() ||
71 (port_permission
== google_util::ALLOW_NON_STANDARD_PORTS
));
77 namespace google_util
{
79 // Global functions -----------------------------------------------------------
81 bool HasGoogleSearchQueryParam(const std::string
& str
) {
82 url::Component
query(0, str
.length()), key
, value
;
83 while (url::ExtractQueryKeyValue(str
.c_str(), &query
, &key
, &value
)) {
84 if ((key
.len
== 1) && (str
[key
.begin
] == 'q') && value
.is_nonempty())
90 GURL
LinkDoctorBaseURL() {
91 if (gUseMockLinkDoctorBaseURLForTesting
)
92 return GURL("http://mock.linkdoctor.url/for?testing");
93 return GURL(LINKDOCTOR_SERVER_REQUEST_URL
);
96 void SetMockLinkDoctorBaseURLForTesting() {
97 gUseMockLinkDoctorBaseURLForTesting
= true;
100 std::string
GetGoogleLocale(const std::string
& application_locale
) {
101 // Google does not recognize "nb" for Norwegian Bokmal; it uses "no".
102 return (application_locale
== "nb") ? "no" : application_locale
;
105 GURL
AppendGoogleLocaleParam(const GURL
& url
,
106 const std::string
& application_locale
) {
107 return net::AppendQueryParameter(
108 url
, "hl", GetGoogleLocale(application_locale
));
111 std::string
GetGoogleCountryCode(GURL google_homepage_url
) {
112 const std::string google_hostname
= google_homepage_url
.host();
113 const size_t last_dot
= google_hostname
.find_last_of('.');
114 if (last_dot
== std::string::npos
) {
117 std::string country_code
= google_hostname
.substr(last_dot
+ 1);
118 // Assume the com TLD implies the US.
119 if (country_code
== "com")
121 // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR
122 // code for the UK is "gb".
123 if (country_code
== "uk")
125 // Catalonia does not have a CLDR country code, since it's a region in Spain,
126 // so use Spain instead.
127 if (country_code
== "cat")
132 GURL
GetGoogleSearchURL(GURL google_homepage_url
) {
133 // To transform the homepage URL into the corresponding search URL, add the
134 // "search" and the "q=" query string.
135 std::string search_path
= "search";
136 std::string query_string
= "q=";
137 GURL::Replacements replacements
;
138 replacements
.SetPathStr(search_path
);
139 replacements
.SetQueryStr(query_string
);
140 return google_homepage_url
.ReplaceComponents(replacements
);
143 GURL
CommandLineGoogleBaseURL() {
144 // Unit tests may add command-line flags after the first call to this
145 // function, so we don't simply initialize a static |base_url| directly and
146 // then unconditionally return it.
147 CR_DEFINE_STATIC_LOCAL(std::string
, switch_value
, ());
148 CR_DEFINE_STATIC_LOCAL(GURL
, base_url
, ());
149 std::string
current_switch_value(
150 CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
151 switches::kGoogleBaseURL
));
152 if (current_switch_value
!= switch_value
) {
153 switch_value
= current_switch_value
;
154 base_url
= url_fixer::FixupURL(switch_value
, std::string());
155 if (!base_url
.is_valid() || base_url
.has_query() || base_url
.has_ref())
161 bool StartsWithCommandLineGoogleBaseURL(const GURL
& url
) {
162 GURL
base_url(CommandLineGoogleBaseURL());
163 return base_url
.is_valid() &&
164 StartsWithASCII(url
.possibly_invalid_spec(), base_url
.spec(), true);
167 bool IsGoogleHostname(const std::string
& host
,
168 SubdomainPermission subdomain_permission
) {
169 GURL
base_url(CommandLineGoogleBaseURL());
170 if (base_url
.is_valid() && (host
== base_url
.host()))
173 return IsValidHostName(host
, "google", subdomain_permission
);
176 bool IsGoogleDomainUrl(const GURL
& url
,
177 SubdomainPermission subdomain_permission
,
178 PortPermission port_permission
) {
179 return IsValidURL(url
, port_permission
) &&
180 IsGoogleHostname(url
.host(), subdomain_permission
);
183 bool IsGoogleHomePageUrl(const GURL
& url
) {
184 // First check to see if this has a Google domain.
185 if (!IsGoogleDomainUrl(url
, DISALLOW_SUBDOMAIN
, DISALLOW_NON_STANDARD_PORTS
))
188 // Make sure the path is a known home page path.
189 std::string
path(url
.path());
190 return IsPathHomePageBase(path
) || StartsWithASCII(path
, "/ig", false);
193 bool IsGoogleSearchUrl(const GURL
& url
) {
194 // First check to see if this has a Google domain.
195 if (!IsGoogleDomainUrl(url
, DISALLOW_SUBDOMAIN
, DISALLOW_NON_STANDARD_PORTS
))
198 // Make sure the path is a known search path.
199 std::string
path(url
.path());
200 bool is_home_page_base
= IsPathHomePageBase(path
);
201 if (!is_home_page_base
&& (path
!= "/search"))
204 // Check for query parameter in URL parameter and hash fragment, depending on
206 return HasGoogleSearchQueryParam(url
.ref()) ||
207 (!is_home_page_base
&& HasGoogleSearchQueryParam(url
.query()));
210 bool IsYoutubeDomainUrl(const GURL
& url
,
211 SubdomainPermission subdomain_permission
,
212 PortPermission port_permission
) {
213 return IsValidURL(url
, port_permission
) &&
214 IsValidHostName(url
.host(), "youtube", subdomain_permission
);
217 } // namespace google_util