Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / chrome / browser / ssl / ssl_error_classification.cc
blobd78a1c507c62bc5e82b4ee488e4fe6e88a3e3e34
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <vector>
7 #include "chrome/browser/ssl/ssl_error_classification.h"
9 #include "base/build_time.h"
10 #include "base/metrics/field_trial.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_split.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/time/time.h"
15 #include "chrome/browser/browser_process.h"
16 #include "chrome/browser/chrome_notification_types.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "components/ssl_errors/error_info.h"
19 #include "content/public/browser/notification_service.h"
20 #include "content/public/browser/web_contents.h"
21 #include "net/base/net_util.h"
22 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
23 #include "net/cert/x509_cert_types.h"
24 #include "net/cert/x509_certificate.h"
25 #include "url/gurl.h"
27 #if defined(ENABLE_CAPTIVE_PORTAL_DETECTION)
28 #include "chrome/browser/captive_portal/captive_portal_service.h"
29 #include "chrome/browser/captive_portal/captive_portal_service_factory.h"
30 #endif
32 #if defined(OS_WIN)
33 #include "base/win/win_util.h"
34 #include "base/win/windows_version.h"
35 #endif
37 using base::Time;
38 using base::TimeTicks;
39 using base::TimeDelta;
41 namespace {
43 // Events for UMA. Do not reorder or change!
44 enum SSLInterstitialCause {
45 CLOCK_PAST,
46 CLOCK_FUTURE,
47 WWW_SUBDOMAIN_MATCH,
48 SUBDOMAIN_MATCH,
49 SUBDOMAIN_INVERSE_MATCH,
50 SUBDOMAIN_OUTSIDE_WILDCARD,
51 HOST_NAME_NOT_KNOWN_TLD,
52 LIKELY_MULTI_TENANT_HOSTING,
53 LOCALHOST,
54 PRIVATE_URL,
55 AUTHORITY_ERROR_CAPTIVE_PORTAL,
56 SELF_SIGNED,
57 EXPIRED_RECENTLY,
58 LIKELY_SAME_DOMAIN,
59 UNUSED_INTERSTITIAL_CAUSE_ENTRY,
62 // Events for UMA. Do not reorder or change!
63 enum SSLInterstitialCauseCaptivePortal {
64 CAPTIVE_PORTAL_ALL,
65 CAPTIVE_PORTAL_DETECTION_ENABLED,
66 CAPTIVE_PORTAL_DETECTION_ENABLED_OVERRIDABLE,
67 CAPTIVE_PORTAL_PROBE_COMPLETED,
68 CAPTIVE_PORTAL_PROBE_COMPLETED_OVERRIDABLE,
69 CAPTIVE_PORTAL_NO_RESPONSE,
70 CAPTIVE_PORTAL_NO_RESPONSE_OVERRIDABLE,
71 CAPTIVE_PORTAL_DETECTED,
72 CAPTIVE_PORTAL_DETECTED_OVERRIDABLE,
73 UNUSED_CAPTIVE_PORTAL_EVENT,
76 void RecordSSLInterstitialCause(bool overridable, SSLInterstitialCause event) {
77 if (overridable) {
78 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.cause.overridable", event,
79 UNUSED_INTERSTITIAL_CAUSE_ENTRY);
80 } else {
81 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.cause.nonoverridable", event,
82 UNUSED_INTERSTITIAL_CAUSE_ENTRY);
86 #if defined(ENABLE_CAPTIVE_PORTAL_DETECTION)
87 void RecordCaptivePortalEventStats(SSLInterstitialCauseCaptivePortal event) {
88 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.captive_portal",
89 event,
90 UNUSED_CAPTIVE_PORTAL_EVENT);
92 #endif
94 int GetLevensteinDistance(const std::string& str1,
95 const std::string& str2) {
96 if (str1 == str2)
97 return 0;
98 if (str1.size() == 0)
99 return str2.size();
100 if (str2.size() == 0)
101 return str1.size();
102 std::vector<int> kFirstRow(str2.size() + 1, 0);
103 std::vector<int> kSecondRow(str2.size() + 1, 0);
105 for (size_t i = 0; i < kFirstRow.size(); ++i)
106 kFirstRow[i] = i;
107 for (size_t i = 0; i < str1.size(); ++i) {
108 kSecondRow[0] = i + 1;
109 for (size_t j = 0; j < str2.size(); ++j) {
110 int cost = str1[i] == str2[j] ? 0 : 1;
111 kSecondRow[j+1] = std::min(std::min(
112 kSecondRow[j] + 1, kFirstRow[j + 1] + 1), kFirstRow[j] + cost);
114 for (size_t j = 0; j < kFirstRow.size(); j++)
115 kFirstRow[j] = kSecondRow[j];
117 return kSecondRow[str2.size()];
120 // The time to use when doing build time operations in browser tests.
121 base::Time g_testing_build_time;
123 } // namespace
125 SSLErrorClassification::SSLErrorClassification(
126 content::WebContents* web_contents,
127 const base::Time& current_time,
128 const GURL& url,
129 int cert_error,
130 const net::X509Certificate& cert)
131 : web_contents_(web_contents),
132 current_time_(current_time),
133 request_url_(url),
134 cert_error_(cert_error),
135 cert_(cert),
136 captive_portal_detection_enabled_(false),
137 captive_portal_probe_completed_(false),
138 captive_portal_no_response_(false),
139 captive_portal_detected_(false) {
140 #if defined(ENABLE_CAPTIVE_PORTAL_DETECTION)
141 Profile* profile = Profile::FromBrowserContext(
142 web_contents_->GetBrowserContext());
143 captive_portal_detection_enabled_ =
144 CaptivePortalServiceFactory::GetForProfile(profile)->enabled();
145 registrar_.Add(this,
146 chrome::NOTIFICATION_CAPTIVE_PORTAL_CHECK_RESULT,
147 content::Source<Profile>(profile));
148 #endif
151 SSLErrorClassification::~SSLErrorClassification() { }
153 void SSLErrorClassification::RecordCaptivePortalUMAStatistics(
154 bool overridable) const {
155 #if defined(ENABLE_CAPTIVE_PORTAL_DETECTION)
156 RecordCaptivePortalEventStats(CAPTIVE_PORTAL_ALL);
157 if (captive_portal_detection_enabled_)
158 RecordCaptivePortalEventStats(
159 overridable ?
160 CAPTIVE_PORTAL_DETECTION_ENABLED_OVERRIDABLE :
161 CAPTIVE_PORTAL_DETECTION_ENABLED);
162 if (captive_portal_probe_completed_)
163 RecordCaptivePortalEventStats(
164 overridable ?
165 CAPTIVE_PORTAL_PROBE_COMPLETED_OVERRIDABLE :
166 CAPTIVE_PORTAL_PROBE_COMPLETED);
167 // Log only one of portal detected and no response results.
168 if (captive_portal_detected_)
169 RecordCaptivePortalEventStats(
170 overridable ?
171 CAPTIVE_PORTAL_DETECTED_OVERRIDABLE :
172 CAPTIVE_PORTAL_DETECTED);
173 else if (captive_portal_no_response_)
174 RecordCaptivePortalEventStats(
175 overridable ?
176 CAPTIVE_PORTAL_NO_RESPONSE_OVERRIDABLE :
177 CAPTIVE_PORTAL_NO_RESPONSE);
178 #endif
181 void SSLErrorClassification::RecordUMAStatistics(
182 bool overridable) const {
183 ssl_errors::ErrorInfo::ErrorType type =
184 ssl_errors::ErrorInfo::NetErrorToErrorType(cert_error_);
185 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl_error_type", type,
186 ssl_errors::ErrorInfo::END_OF_ENUM);
187 switch (type) {
188 case ssl_errors::ErrorInfo::CERT_DATE_INVALID: {
189 if (IsUserClockInThePast(base::Time::NowFromSystemTime())) {
190 RecordSSLInterstitialCause(overridable, CLOCK_PAST);
191 } else if (IsUserClockInTheFuture(base::Time::NowFromSystemTime())) {
192 RecordSSLInterstitialCause(overridable, CLOCK_FUTURE);
193 } else if (cert_.HasExpired() && TimePassedSinceExpiry().InDays() < 28) {
194 RecordSSLInterstitialCause(overridable, EXPIRED_RECENTLY);
196 break;
198 case ssl_errors::ErrorInfo::CERT_COMMON_NAME_INVALID: {
199 std::string host_name = request_url_.host();
200 if (IsHostNameKnownTLD(host_name)) {
201 Tokens host_name_tokens = Tokenize(host_name);
202 if (IsWWWSubDomainMatch())
203 RecordSSLInterstitialCause(overridable, WWW_SUBDOMAIN_MATCH);
204 if (IsSubDomainOutsideWildcard(host_name_tokens))
205 RecordSSLInterstitialCause(overridable, SUBDOMAIN_OUTSIDE_WILDCARD);
206 std::vector<std::string> dns_names;
207 cert_.GetDNSNames(&dns_names);
208 std::vector<Tokens> dns_name_tokens = GetTokenizedDNSNames(dns_names);
209 if (NameUnderAnyNames(host_name_tokens, dns_name_tokens))
210 RecordSSLInterstitialCause(overridable, SUBDOMAIN_MATCH);
211 if (AnyNamesUnderName(dns_name_tokens, host_name_tokens))
212 RecordSSLInterstitialCause(overridable, SUBDOMAIN_INVERSE_MATCH);
213 if (IsCertLikelyFromMultiTenantHosting())
214 RecordSSLInterstitialCause(overridable, LIKELY_MULTI_TENANT_HOSTING);
215 if (IsCertLikelyFromSameDomain())
216 RecordSSLInterstitialCause(overridable, LIKELY_SAME_DOMAIN);
217 } else {
218 RecordSSLInterstitialCause(overridable, HOST_NAME_NOT_KNOWN_TLD);
220 break;
222 case ssl_errors::ErrorInfo::CERT_AUTHORITY_INVALID: {
223 const std::string& hostname = request_url_.HostNoBrackets();
224 if (net::IsLocalhost(hostname))
225 RecordSSLInterstitialCause(overridable, LOCALHOST);
226 if (IsHostnameNonUniqueOrDotless(hostname))
227 RecordSSLInterstitialCause(overridable, PRIVATE_URL);
228 if (captive_portal_probe_completed_ && captive_portal_detected_)
229 RecordSSLInterstitialCause(overridable, AUTHORITY_ERROR_CAPTIVE_PORTAL);
230 if (net::X509Certificate::IsSelfSigned(cert_.os_cert_handle()))
231 RecordSSLInterstitialCause(overridable, SELF_SIGNED);
232 break;
234 default:
235 break;
237 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.connection_type",
238 net::NetworkChangeNotifier::GetConnectionType(),
239 net::NetworkChangeNotifier::CONNECTION_LAST);
242 base::TimeDelta SSLErrorClassification::TimePassedSinceExpiry() const {
243 base::TimeDelta delta = current_time_ - cert_.valid_expiry();
244 return delta;
247 bool SSLErrorClassification::IsUserClockInThePast(const base::Time& time_now) {
248 base::Time build_time;
249 if (!g_testing_build_time.is_null()) {
250 build_time = g_testing_build_time;
251 } else {
252 #if defined(DONT_EMBED_BUILD_METADATA) && !defined(OFFICIAL_BUILD)
253 return false;
254 #else
255 build_time = base::GetBuildTime();
256 #endif
259 if (time_now < build_time - base::TimeDelta::FromDays(2))
260 return true;
261 return false;
264 bool SSLErrorClassification::IsUserClockInTheFuture(
265 const base::Time& time_now) {
266 base::Time build_time;
267 if (!g_testing_build_time.is_null()) {
268 build_time = g_testing_build_time;
269 } else {
270 #if defined(DONT_EMBED_BUILD_METADATA) && !defined(OFFICIAL_BUILD)
271 return false;
272 #else
273 build_time = base::GetBuildTime();
274 #endif
277 if (time_now > build_time + base::TimeDelta::FromDays(365))
278 return true;
279 return false;
282 // static
283 void SSLErrorClassification::SetBuildTimeForTesting(
284 const base::Time& testing_time) {
285 g_testing_build_time = testing_time;
288 bool SSLErrorClassification::MaybeWindowsLacksSHA256Support() {
289 #if defined(OS_WIN)
290 return !base::win::MaybeHasSHA256Support();
291 #else
292 return false;
293 #endif
296 bool SSLErrorClassification::IsHostNameKnownTLD(const std::string& host_name) {
297 size_t tld_length =
298 net::registry_controlled_domains::GetRegistryLength(
299 host_name,
300 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
301 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
302 if (tld_length == 0 || tld_length == std::string::npos)
303 return false;
304 return true;
307 std::vector<SSLErrorClassification::Tokens> SSLErrorClassification::
308 GetTokenizedDNSNames(const std::vector<std::string>& dns_names) {
309 std::vector<std::vector<std::string>> dns_name_tokens;
310 for (size_t i = 0; i < dns_names.size(); ++i) {
311 std::vector<std::string> dns_name_token_single;
312 if (dns_names[i].empty() || dns_names[i].find('\0') != std::string::npos
313 || !(IsHostNameKnownTLD(dns_names[i]))) {
314 dns_name_token_single.push_back(std::string());
315 } else {
316 dns_name_token_single = Tokenize(dns_names[i]);
318 dns_name_tokens.push_back(dns_name_token_single);
320 return dns_name_tokens;
323 size_t SSLErrorClassification::FindSubDomainDifference(
324 const Tokens& potential_subdomain, const Tokens& parent) const {
325 // A check to ensure that the number of tokens in the tokenized_parent is
326 // less than the tokenized_potential_subdomain.
327 if (parent.size() >= potential_subdomain.size())
328 return 0;
330 size_t tokens_match = 0;
331 size_t diff_size = potential_subdomain.size() - parent.size();
332 for (size_t i = 0; i < parent.size(); ++i) {
333 if (parent[i] == potential_subdomain[i + diff_size])
334 tokens_match++;
336 if (tokens_match == parent.size())
337 return diff_size;
338 return 0;
341 SSLErrorClassification::Tokens SSLErrorClassification::
342 Tokenize(const std::string& name) {
343 return base::SplitString(
344 name, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
347 // We accept the inverse case for www for historical reasons.
348 bool SSLErrorClassification::GetWWWSubDomainMatch(
349 const std::string& host_name,
350 const std::vector<std::string>& dns_names,
351 std::string* www_match_host_name) {
352 if (IsHostNameKnownTLD(host_name)) {
353 // Need to account for all possible domains given in the SSL certificate.
354 for (size_t i = 0; i < dns_names.size(); ++i) {
355 if (dns_names[i].empty() ||
356 dns_names[i].find('\0') != std::string::npos ||
357 dns_names[i].length() == host_name.length() ||
358 !IsHostNameKnownTLD(dns_names[i])) {
359 continue;
360 } else if (dns_names[i].length() > host_name.length()) {
361 if (net::StripWWW(base::ASCIIToUTF16(dns_names[i])) ==
362 base::ASCIIToUTF16(host_name)) {
363 *www_match_host_name = dns_names[i];
364 return true;
366 } else {
367 if (net::StripWWW(base::ASCIIToUTF16(host_name)) ==
368 base::ASCIIToUTF16(dns_names[i])) {
369 *www_match_host_name = dns_names[i];
370 return true;
375 return false;
378 bool SSLErrorClassification::IsWWWSubDomainMatch() const {
379 const std::string& host_name = request_url_.host();
380 std::vector<std::string> dns_names;
381 cert_.GetDNSNames(&dns_names);
382 std::string www_host;
383 return GetWWWSubDomainMatch(host_name, dns_names, &www_host);
386 bool SSLErrorClassification::NameUnderAnyNames(
387 const Tokens& child,
388 const std::vector<Tokens>& potential_parents) const {
389 bool result = false;
390 // Need to account for all the possible domains given in the SSL certificate.
391 for (size_t i = 0; i < potential_parents.size(); ++i) {
392 if (potential_parents[i].empty() ||
393 potential_parents[i].size() >= child.size()) {
394 result = result || false;
395 } else {
396 size_t domain_diff = FindSubDomainDifference(child,
397 potential_parents[i]);
398 if (domain_diff == 1 && child[0] != "www")
399 result = result || true;
402 return result;
405 bool SSLErrorClassification::AnyNamesUnderName(
406 const std::vector<Tokens>& potential_children,
407 const Tokens& parent) const {
408 bool result = false;
409 // Need to account for all the possible domains given in the SSL certificate.
410 for (size_t i = 0; i < potential_children.size(); ++i) {
411 if (potential_children[i].empty() ||
412 potential_children[i].size() <= parent.size()) {
413 result = result || false;
414 } else {
415 size_t domain_diff = FindSubDomainDifference(potential_children[i],
416 parent);
417 if (domain_diff == 1 && potential_children[i][0] != "www")
418 result = result || true;
421 return result;
424 bool SSLErrorClassification::IsSubDomainOutsideWildcard(
425 const Tokens& host_name_tokens) const {
426 std::string host_name = request_url_.host();
427 std::vector<std::string> dns_names;
428 cert_.GetDNSNames(&dns_names);
429 bool result = false;
431 // This method requires that the host name be longer than the dns name on
432 // the certificate.
433 for (size_t i = 0; i < dns_names.size(); ++i) {
434 const std::string& name = dns_names[i];
435 if (name.length() < 2 || name.length() >= host_name.length() ||
436 name.find('\0') != std::string::npos ||
437 !IsHostNameKnownTLD(name)
438 || name[0] != '*' || name[1] != '.') {
439 continue;
442 // Move past the "*.".
443 std::string extracted_dns_name = name.substr(2);
444 if (FindSubDomainDifference(
445 host_name_tokens, Tokenize(extracted_dns_name)) == 2) {
446 return true;
449 return result;
452 bool SSLErrorClassification::IsCertLikelyFromMultiTenantHosting() const {
453 std::string host_name = request_url_.host();
454 std::vector<std::string> dns_names;
455 std::vector<std::string> dns_names_domain;
456 cert_.GetDNSNames(&dns_names);
457 size_t dns_names_size = dns_names.size();
459 // If there is only 1 DNS name then it is definitely not a shared certificate.
460 if (dns_names_size == 0 || dns_names_size == 1)
461 return false;
463 // Check to see if all the domains in the SAN field in the SSL certificate are
464 // the same or not.
465 for (size_t i = 0; i < dns_names_size; ++i) {
466 dns_names_domain.push_back(
467 net::registry_controlled_domains::
468 GetDomainAndRegistry(
469 dns_names[i],
470 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES));
472 for (size_t i = 1; i < dns_names_domain.size(); ++i) {
473 if (dns_names_domain[i] != dns_names_domain[0])
474 return false;
477 // If the number of DNS names is more than 5 then assume that it is a shared
478 // certificate.
479 static const int kDistinctNameThreshold = 5;
480 if (dns_names_size > kDistinctNameThreshold)
481 return true;
483 // Heuristic - The edit distance between all the strings should be at least 5
484 // for it to be counted as a shared SSLCertificate. If even one pair of
485 // strings edit distance is below 5 then the certificate is no longer
486 // considered as a shared certificate. Include the host name in the URL also
487 // while comparing.
488 dns_names.push_back(host_name);
489 static const int kMinimumEditDsitance = 5;
490 for (size_t i = 0; i < dns_names_size; ++i) {
491 for (size_t j = i + 1; j < dns_names_size; ++j) {
492 int edit_distance = GetLevensteinDistance(dns_names[i], dns_names[j]);
493 if (edit_distance < kMinimumEditDsitance)
494 return false;
497 return true;
500 bool SSLErrorClassification::IsCertLikelyFromSameDomain() const {
501 std::string host_name = request_url_.host();
502 std::vector<std::string> dns_names;
503 cert_.GetDNSNames(&dns_names);
505 dns_names.push_back(host_name);
506 std::vector<std::string> dns_names_domain;
508 for (const std::string& dns_name : dns_names) {
509 dns_names_domain.push_back(
510 net::registry_controlled_domains::GetDomainAndRegistry(
511 dns_name,
512 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES));
515 DCHECK(!dns_names_domain.empty());
516 const std::string& host_name_domain = dns_names_domain.back();
518 // Last element is the original domain. So, excluding it.
519 return std::find(dns_names_domain.begin(), dns_names_domain.end() - 1,
520 host_name_domain) != dns_names_domain.end() - 1;
523 // static
524 bool SSLErrorClassification::IsHostnameNonUniqueOrDotless(
525 const std::string& hostname) {
526 return net::IsHostnameNonUnique(hostname) ||
527 hostname.find('.') == std::string::npos;
530 void SSLErrorClassification::Observe(
531 int type,
532 const content::NotificationSource& source,
533 const content::NotificationDetails& details) {
534 #if defined(ENABLE_CAPTIVE_PORTAL_DETECTION)
535 // When detection is disabled, captive portal service always sends
536 // RESULT_INTERNET_CONNECTED. Ignore any probe results in that case.
537 if (!captive_portal_detection_enabled_)
538 return;
539 if (type == chrome::NOTIFICATION_CAPTIVE_PORTAL_CHECK_RESULT) {
540 captive_portal_probe_completed_ = true;
541 CaptivePortalService::Results* results =
542 content::Details<CaptivePortalService::Results>(details).ptr();
543 // If a captive portal was detected at any point when the interstitial was
544 // displayed, assume that the interstitial was caused by a captive portal.
545 // Example scenario:
546 // 1- Interstitial displayed and captive portal detected, setting the flag.
547 // 2- Captive portal detection automatically opens portal login page.
548 // 3- User logs in on the portal login page.
549 // A notification will be received here for RESULT_INTERNET_CONNECTED. Make
550 // sure we don't clear the captive protal flag, since the interstitial was
551 // potentially caused by the captive portal.
552 captive_portal_detected_ = captive_portal_detected_ ||
553 (results->result == captive_portal::RESULT_BEHIND_CAPTIVE_PORTAL);
554 // Also keep track of non-HTTP portals and error cases.
555 captive_portal_no_response_ = captive_portal_no_response_ ||
556 (results->result == captive_portal::RESULT_NO_RESPONSE);
558 #endif