Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / chrome / browser / safe_browsing / client_side_detection_service.cc
blob6f431e563750c2047ebd3508277ca3aa76bb9a64
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
7 #include <algorithm>
9 #include "base/bind.h"
10 #include "base/location.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/sparse_histogram.h"
15 #include "base/prefs/pref_service.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/stl_util.h"
18 #include "base/thread_task_runner_handle.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/browser_process.h"
21 #include "chrome/browser/profiles/profile.h"
22 #include "chrome/common/pref_names.h"
23 #include "chrome/common/safe_browsing/client_model.pb.h"
24 #include "chrome/common/safe_browsing/csd.pb.h"
25 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
26 #include "content/public/browser/browser_thread.h"
27 #include "content/public/browser/notification_service.h"
28 #include "content/public/browser/notification_types.h"
29 #include "content/public/browser/render_process_host.h"
30 #include "crypto/sha2.h"
31 #include "google_apis/google_api_keys.h"
32 #include "net/base/escape.h"
33 #include "net/base/load_flags.h"
34 #include "net/base/net_util.h"
35 #include "net/http/http_response_headers.h"
36 #include "net/http/http_status_code.h"
37 #include "net/url_request/url_fetcher.h"
38 #include "net/url_request/url_request_context_getter.h"
39 #include "net/url_request/url_request_status.h"
40 #include "url/gurl.h"
42 using content::BrowserThread;
44 namespace safe_browsing {
46 namespace {
48 // malware report type for UMA histogram counting.
49 enum MalwareReportTypes {
50 REPORT_SENT,
51 REPORT_HIT_LIMIT,
52 REPORT_FAILED_SERIALIZATION,
54 // Always at the end
55 REPORT_RESULT_MAX
58 void UpdateEnumUMAHistogram(MalwareReportTypes report_type) {
59 DCHECK(report_type >= 0 && report_type < REPORT_RESULT_MAX);
60 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.SentReports",
61 report_type, REPORT_RESULT_MAX);
64 } // namespace
66 const int ClientSideDetectionService::kInitialClientModelFetchDelayMs = 10000;
67 const int ClientSideDetectionService::kReportsIntervalDays = 1;
68 const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
69 const int ClientSideDetectionService::kNegativeCacheIntervalDays = 1;
70 const int ClientSideDetectionService::kPositiveCacheIntervalMinutes = 30;
72 const char ClientSideDetectionService::kClientReportPhishingUrl[] =
73 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
74 const char ClientSideDetectionService::kClientReportMalwareUrl[] =
75 "https://sb-ssl.google.com/safebrowsing/clientreport/malware-check";
77 struct ClientSideDetectionService::ClientReportInfo {
78 ClientReportPhishingRequestCallback callback;
79 GURL phishing_url;
82 struct ClientSideDetectionService::ClientMalwareReportInfo {
83 ClientReportMalwareRequestCallback callback;
84 // This is the original landing url, may not be the malware url.
85 GURL original_url;
88 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
89 : is_phishing(phish),
90 timestamp(time) {}
92 ClientSideDetectionService::ClientSideDetectionService(
93 net::URLRequestContextGetter* request_context_getter)
94 : enabled_(false),
95 request_context_getter_(request_context_getter),
96 weak_factory_(this) {
97 base::Closure update_renderers =
98 base::Bind(&ClientSideDetectionService::SendModelToRenderers,
99 base::Unretained(this));
100 model_loader_standard_.reset(
101 new ModelLoader(update_renderers, request_context_getter, false));
102 model_loader_extended_.reset(
103 new ModelLoader(update_renderers, request_context_getter, true));
105 registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED,
106 content::NotificationService::AllBrowserContextsAndSources());
109 ClientSideDetectionService::~ClientSideDetectionService() {
110 weak_factory_.InvalidateWeakPtrs();
111 STLDeleteContainerPairPointers(client_phishing_reports_.begin(),
112 client_phishing_reports_.end());
113 client_phishing_reports_.clear();
114 STLDeleteContainerPairPointers(client_malware_reports_.begin(),
115 client_malware_reports_.end());
116 client_malware_reports_.clear();
119 // static
120 ClientSideDetectionService* ClientSideDetectionService::Create(
121 net::URLRequestContextGetter* request_context_getter) {
122 DCHECK_CURRENTLY_ON(BrowserThread::UI);
123 return new ClientSideDetectionService(request_context_getter);
126 void ClientSideDetectionService::SetEnabledAndRefreshState(bool enabled) {
127 DCHECK_CURRENTLY_ON(BrowserThread::UI);
128 SendModelToRenderers(); // always refresh the renderer state
129 if (enabled == enabled_)
130 return;
131 enabled_ = enabled;
132 if (enabled_) {
133 // Refresh the models when the service is enabled. This can happen when
134 // either of the preferences are toggled, or early during startup if
135 // safe browsing is already enabled. In a lot of cases the model will be
136 // in the cache so it won't actually be fetched from the network.
137 // We delay the first model fetches to avoid slowing down browser startup.
138 model_loader_standard_->ScheduleFetch(kInitialClientModelFetchDelayMs);
139 model_loader_extended_->ScheduleFetch(kInitialClientModelFetchDelayMs);
140 } else {
141 // Cancel model loads in progress.
142 model_loader_standard_->CancelFetcher();
143 model_loader_extended_->CancelFetcher();
144 // Invoke pending callbacks with a false verdict.
145 for (std::map<const net::URLFetcher*, ClientReportInfo*>::iterator it =
146 client_phishing_reports_.begin();
147 it != client_phishing_reports_.end(); ++it) {
148 ClientReportInfo* info = it->second;
149 if (!info->callback.is_null())
150 info->callback.Run(info->phishing_url, false);
152 STLDeleteContainerPairPointers(client_phishing_reports_.begin(),
153 client_phishing_reports_.end());
154 client_phishing_reports_.clear();
155 for (std::map<const net::URLFetcher*, ClientMalwareReportInfo*>::iterator it
156 = client_malware_reports_.begin();
157 it != client_malware_reports_.end(); ++it) {
158 ClientMalwareReportInfo* info = it->second;
159 if (!info->callback.is_null())
160 info->callback.Run(info->original_url, info->original_url, false);
162 STLDeleteContainerPairPointers(client_malware_reports_.begin(),
163 client_malware_reports_.end());
164 client_malware_reports_.clear();
165 cache_.clear();
169 void ClientSideDetectionService::SendClientReportPhishingRequest(
170 ClientPhishingRequest* verdict,
171 bool is_extended_reporting,
172 const ClientReportPhishingRequestCallback& callback) {
173 DCHECK_CURRENTLY_ON(BrowserThread::UI);
174 base::ThreadTaskRunnerHandle::Get()->PostTask(
175 FROM_HERE,
176 base::Bind(&ClientSideDetectionService::StartClientReportPhishingRequest,
177 weak_factory_.GetWeakPtr(), verdict, is_extended_reporting,
178 callback));
181 void ClientSideDetectionService::SendClientReportMalwareRequest(
182 ClientMalwareRequest* verdict,
183 const ClientReportMalwareRequestCallback& callback) {
184 DCHECK_CURRENTLY_ON(BrowserThread::UI);
185 base::ThreadTaskRunnerHandle::Get()->PostTask(
186 FROM_HERE,
187 base::Bind(&ClientSideDetectionService::StartClientReportMalwareRequest,
188 weak_factory_.GetWeakPtr(), verdict, callback));
191 bool ClientSideDetectionService::IsPrivateIPAddress(
192 const std::string& ip_address) const {
193 net::IPAddressNumber ip_number;
194 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
195 DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
196 // Err on the side of safety and assume this might be private.
197 return true;
200 return net::IsIPAddressReserved(ip_number);
203 void ClientSideDetectionService::OnURLFetchComplete(
204 const net::URLFetcher* source) {
205 std::string data;
206 source->GetResponseAsString(&data);
208 if (client_phishing_reports_.find(source) != client_phishing_reports_.end()) {
209 HandlePhishingVerdict(
210 source, source->GetURL(), source->GetStatus(),
211 source->GetResponseCode(), source->GetCookies(), data);
212 } else if (client_malware_reports_.find(source) !=
213 client_malware_reports_.end()) {
214 HandleMalwareVerdict(
215 source, source->GetURL(), source->GetStatus(),
216 source->GetResponseCode(), source->GetCookies(), data);
217 } else {
218 NOTREACHED();
222 void ClientSideDetectionService::Observe(
223 int type,
224 const content::NotificationSource& source,
225 const content::NotificationDetails& details) {
226 DCHECK_CURRENTLY_ON(BrowserThread::UI);
227 DCHECK(type == content::NOTIFICATION_RENDERER_PROCESS_CREATED);
228 SendModelToProcess(
229 content::Source<content::RenderProcessHost>(source).ptr());
232 void ClientSideDetectionService::SendModelToProcess(
233 content::RenderProcessHost* process) {
234 // The ClientSideDetectionService is enabled if _any_ active profile has
235 // SafeBrowsing turned on. Here we check the profile for each renderer
236 // process and only send the model to those that have SafeBrowsing enabled,
237 // and we select the model based on the extended reporting setting.
238 Profile* profile = Profile::FromBrowserContext(process->GetBrowserContext());
239 std::string model;
240 if (profile->GetPrefs()->GetBoolean(prefs::kSafeBrowsingEnabled)) {
241 if (profile->GetPrefs()->GetBoolean(
242 prefs::kSafeBrowsingExtendedReportingEnabled)) {
243 DVLOG(2) << "Sending phishing model " << model_loader_extended_->name()
244 << " to RenderProcessHost @" << process;
245 model = model_loader_extended_->model_str();
246 } else {
247 DVLOG(2) << "Sending phishing model " << model_loader_standard_->name()
248 << " to RenderProcessHost @" << process;
249 model = model_loader_standard_->model_str();
251 } else {
252 DVLOG(2) << "Disabling client-side phishing detection for "
253 << "RenderProcessHost @" << process;
255 process->Send(new SafeBrowsingMsg_SetPhishingModel(model));
258 void ClientSideDetectionService::SendModelToRenderers() {
259 for (content::RenderProcessHost::iterator i(
260 content::RenderProcessHost::AllHostsIterator());
261 !i.IsAtEnd(); i.Advance()) {
262 SendModelToProcess(i.GetCurrentValue());
266 void ClientSideDetectionService::StartClientReportPhishingRequest(
267 ClientPhishingRequest* verdict,
268 bool is_extended_reporting,
269 const ClientReportPhishingRequestCallback& callback) {
270 DCHECK_CURRENTLY_ON(BrowserThread::UI);
271 scoped_ptr<ClientPhishingRequest> request(verdict);
273 if (!enabled_) {
274 if (!callback.is_null())
275 callback.Run(GURL(request->url()), false);
276 return;
279 // Fill in metadata about which model we used.
280 if (is_extended_reporting) {
281 request->set_model_filename(model_loader_extended_->name());
282 request->mutable_population()->set_user_population(
283 ChromeUserPopulation::EXTENDED_REPORTING);
284 } else {
285 request->set_model_filename(model_loader_standard_->name());
286 request->mutable_population()->set_user_population(
287 ChromeUserPopulation::SAFE_BROWSING);
289 DVLOG(2) << "Starting report for hit on model " << request->model_filename();
291 std::string request_data;
292 if (!request->SerializeToString(&request_data)) {
293 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
294 DVLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
295 if (!callback.is_null())
296 callback.Run(GURL(request->url()), false);
297 return;
300 net::URLFetcher* fetcher =
301 net::URLFetcher::Create(0 /* ID used for testing */,
302 GetClientReportUrl(kClientReportPhishingUrl),
303 net::URLFetcher::POST, this).release();
305 // Remember which callback and URL correspond to the current fetcher object.
306 ClientReportInfo* info = new ClientReportInfo;
307 info->callback = callback;
308 info->phishing_url = GURL(request->url());
309 client_phishing_reports_[fetcher] = info;
311 fetcher->SetLoadFlags(net::LOAD_DISABLE_CACHE);
312 fetcher->SetRequestContext(request_context_getter_.get());
313 fetcher->SetUploadData("application/octet-stream", request_data);
314 fetcher->Start();
316 // Record that we made a request
317 phishing_report_times_.push(base::Time::Now());
320 void ClientSideDetectionService::StartClientReportMalwareRequest(
321 ClientMalwareRequest* verdict,
322 const ClientReportMalwareRequestCallback& callback) {
323 DCHECK_CURRENTLY_ON(BrowserThread::UI);
324 scoped_ptr<ClientMalwareRequest> request(verdict);
326 if (!enabled_) {
327 if (!callback.is_null())
328 callback.Run(GURL(request->url()), GURL(request->url()), false);
329 return;
332 std::string request_data;
333 if (!request->SerializeToString(&request_data)) {
334 UpdateEnumUMAHistogram(REPORT_FAILED_SERIALIZATION);
335 DVLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
336 if (!callback.is_null())
337 callback.Run(GURL(request->url()), GURL(request->url()), false);
338 return;
341 net::URLFetcher* fetcher =
342 net::URLFetcher::Create(0 /* ID used for testing */,
343 GetClientReportUrl(kClientReportMalwareUrl),
344 net::URLFetcher::POST, this).release();
346 // Remember which callback and URL correspond to the current fetcher object.
347 ClientMalwareReportInfo* info = new ClientMalwareReportInfo;
348 info->callback = callback;
349 info->original_url = GURL(request->url());
350 client_malware_reports_[fetcher] = info;
352 fetcher->SetLoadFlags(net::LOAD_DISABLE_CACHE);
353 fetcher->SetRequestContext(request_context_getter_.get());
354 fetcher->SetUploadData("application/octet-stream", request_data);
355 fetcher->Start();
357 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.SentReports",
358 REPORT_SENT, REPORT_RESULT_MAX);
360 UMA_HISTOGRAM_COUNTS("SBClientMalware.IPBlacklistRequestPayloadSize",
361 request_data.size());
363 // Record that we made a malware request
364 malware_report_times_.push(base::Time::Now());
368 void ClientSideDetectionService::HandlePhishingVerdict(
369 const net::URLFetcher* source,
370 const GURL& url,
371 const net::URLRequestStatus& status,
372 int response_code,
373 const net::ResponseCookies& cookies,
374 const std::string& data) {
375 ClientPhishingResponse response;
376 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
377 bool is_phishing = false;
378 if (status.is_success() && net::HTTP_OK == response_code &&
379 response.ParseFromString(data)) {
380 // Cache response, possibly flushing an old one.
381 cache_[info->phishing_url] =
382 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
383 is_phishing = response.phishy();
384 } else {
385 DLOG(ERROR) << "Unable to get the server verdict for URL: "
386 << info->phishing_url << " status: " << status.status() << " "
387 << "response_code:" << response_code;
389 if (!info->callback.is_null())
390 info->callback.Run(info->phishing_url, is_phishing);
391 client_phishing_reports_.erase(source);
392 delete source;
395 void ClientSideDetectionService::HandleMalwareVerdict(
396 const net::URLFetcher* source,
397 const GURL& url,
398 const net::URLRequestStatus& status,
399 int response_code,
400 const net::ResponseCookies& cookies,
401 const std::string& data) {
402 if (status.is_success()) {
403 UMA_HISTOGRAM_SPARSE_SLOWLY(
404 "SBClientMalware.IPBlacklistRequestResponseCode", response_code);
406 // status error is negative, so we put - in front of it.
407 UMA_HISTOGRAM_SPARSE_SLOWLY(
408 "SBClientMalware.IPBlacklistRequestNetError", -status.error());
410 ClientMalwareResponse response;
411 scoped_ptr<ClientMalwareReportInfo> info(client_malware_reports_[source]);
412 bool should_blacklist = false;
413 if (status.is_success() && net::HTTP_OK == response_code &&
414 response.ParseFromString(data)) {
415 should_blacklist = response.blacklist();
416 } else {
417 DLOG(ERROR) << "Unable to get the server verdict for URL: "
418 << info->original_url << " status: " << status.status() << " "
419 << "response_code:" << response_code;
422 if (!info->callback.is_null()) {
423 if (response.has_bad_url())
424 info->callback.Run(info->original_url, GURL(response.bad_url()),
425 should_blacklist);
426 else
427 info->callback.Run(info->original_url, info->original_url, false);
430 client_malware_reports_.erase(source);
431 delete source;
434 bool ClientSideDetectionService::IsInCache(const GURL& url) {
435 UpdateCache();
437 return cache_.find(url) != cache_.end();
440 bool ClientSideDetectionService::GetValidCachedResult(const GURL& url,
441 bool* is_phishing) {
442 UpdateCache();
444 PhishingCache::iterator it = cache_.find(url);
445 if (it == cache_.end()) {
446 return false;
449 // We still need to check if the result is valid.
450 const CacheState& cache_state = *it->second;
451 if (cache_state.is_phishing ?
452 cache_state.timestamp > base::Time::Now() -
453 base::TimeDelta::FromMinutes(kPositiveCacheIntervalMinutes) :
454 cache_state.timestamp > base::Time::Now() -
455 base::TimeDelta::FromDays(kNegativeCacheIntervalDays)) {
456 *is_phishing = cache_state.is_phishing;
457 return true;
459 return false;
462 void ClientSideDetectionService::UpdateCache() {
463 // Since we limit the number of requests but allow pass-through for cache
464 // refreshes, we don't want to remove elements from the cache if they
465 // could be used for this purpose even if we will not use the entry to
466 // satisfy the request from the cache.
467 base::TimeDelta positive_cache_interval =
468 std::max(base::TimeDelta::FromMinutes(kPositiveCacheIntervalMinutes),
469 base::TimeDelta::FromDays(kReportsIntervalDays));
470 base::TimeDelta negative_cache_interval =
471 std::max(base::TimeDelta::FromDays(kNegativeCacheIntervalDays),
472 base::TimeDelta::FromDays(kReportsIntervalDays));
474 // Remove elements from the cache that will no longer be used.
475 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
476 const CacheState& cache_state = *it->second;
477 if (cache_state.is_phishing ?
478 cache_state.timestamp > base::Time::Now() - positive_cache_interval :
479 cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
480 ++it;
481 } else {
482 cache_.erase(it++);
487 bool ClientSideDetectionService::OverMalwareReportLimit() {
488 return GetMalwareNumReports() > kMaxReportsPerInterval;
491 bool ClientSideDetectionService::OverPhishingReportLimit() {
492 return GetPhishingNumReports() > kMaxReportsPerInterval;
495 int ClientSideDetectionService::GetMalwareNumReports() {
496 return GetNumReports(&malware_report_times_);
499 int ClientSideDetectionService::GetPhishingNumReports() {
500 return GetNumReports(&phishing_report_times_);
503 int ClientSideDetectionService::GetNumReports(
504 std::queue<base::Time>* report_times) {
505 base::Time cutoff =
506 base::Time::Now() - base::TimeDelta::FromDays(kReportsIntervalDays);
508 // Erase items older than cutoff because we will never care about them again.
509 while (!report_times->empty() &&
510 report_times->front() < cutoff) {
511 report_times->pop();
514 // Return the number of elements that are above the cutoff.
515 return report_times->size();
518 // static
519 GURL ClientSideDetectionService::GetClientReportUrl(
520 const std::string& report_url) {
521 GURL url(report_url);
522 std::string api_key = google_apis::GetAPIKey();
523 if (!api_key.empty())
524 url = url.Resolve("?key=" + net::EscapeQueryParamValue(api_key, true));
526 return url;
529 } // namespace safe_browsing