chrome/browser/safe_browsing/client_side_detection_host.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
   6
   7 #include <vector>
   8
   9 #include "base/logging.h"
  10 #include "base/memory/ref_counted.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/metrics/histogram.h"
  13 #include "base/prefs/pref_service.h"
  14 #include "base/sequenced_task_runner_helpers.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "chrome/browser/browser_process.h"
  17 #include "chrome/browser/profiles/profile.h"
  18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
  19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
  20 #include "chrome/browser/safe_browsing/database_manager.h"
  21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
  22 #include "chrome/common/pref_names.h"
  23 #include "chrome/common/safe_browsing/csd.pb.h"
  24 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
  25 #include "content/public/browser/browser_thread.h"
  26 #include "content/public/browser/navigation_controller.h"
  27 #include "content/public/browser/navigation_details.h"
  28 #include "content/public/browser/navigation_entry.h"
  29 #include "content/public/browser/render_process_host.h"
  30 #include "content/public/browser/render_view_host.h"
  31 #include "content/public/browser/resource_request_details.h"
  32 #include "content/public/browser/web_contents.h"
  33 #include "content/public/common/frame_navigate_params.h"
  34 #include "content/public/common/url_constants.h"
  35 #include "url/gurl.h"
  36
  37 using content::BrowserThread;
  38 using content::NavigationEntry;
  39 using content::ResourceRequestDetails;
  40 using content::ResourceType;
  41 using content::WebContents;
  42
  43 namespace safe_browsing {
  44
  45 const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
  46 const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
  47
  48 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
  49
  50 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
  51
  52 // This class is instantiated each time a new toplevel URL loads, and
  53 // asynchronously checks whether the malware and phishing classifiers should run
  54 // for this URL.  If so, it notifies the host class by calling the provided
  55 // callback form the UI thread.  Objects of this class are ref-counted and will
  56 // be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
  57 // or |host| go away you need to call Cancel().  We keep the |database_manager|
  58 // alive in a ref pointer for as long as it takes.
  59 class ClientSideDetectionHost::ShouldClassifyUrlRequest
  60     : public base::RefCountedThreadSafe<
  61           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
  62  public:
  63   ShouldClassifyUrlRequest(
  64       const content::FrameNavigateParams& params,
  65       const ShouldClassifyUrlCallback& start_phishing_classification,
  66       const ShouldClassifyUrlCallback& start_malware_classification,
  67       WebContents* web_contents,
  68       ClientSideDetectionService* csd_service,
  69       SafeBrowsingDatabaseManager* database_manager,
  70       ClientSideDetectionHost* host)
  71       : params_(params),
  72         web_contents_(web_contents),
  73         csd_service_(csd_service),
  74         database_manager_(database_manager),
  75         host_(host),
  76         start_phishing_classification_cb_(start_phishing_classification),
  77         start_malware_classification_cb_(start_malware_classification) {
  78     DCHECK_CURRENTLY_ON(BrowserThread::UI);
  79     DCHECK(web_contents_);
  80     DCHECK(csd_service_);
  81     DCHECK(database_manager_.get());
  82     DCHECK(host_);
  83   }
  84
  85   void Start() {
  86     DCHECK_CURRENTLY_ON(BrowserThread::UI);
  87
  88     // We start by doing some simple checks that can run on the UI thread.
  89     UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
  90     UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
  91
  92     // Only classify [X]HTML documents.
  93     if (params_.contents_mime_type != "text/html" &&
  94         params_.contents_mime_type != "application/xhtml+xml") {
  95       DVLOG(1) << "Skipping phishing classification for URL: " << params_.url
  96                << " because it has an unsupported MIME type: "
  97                << params_.contents_mime_type;
  98       DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
  99     }
 100
 101     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
 102       DVLOG(1) << "Skipping phishing classification for URL: " << params_.url
 103                << " because of hosting on private IP: "
 104                << params_.socket_address.host();
 105       DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
 106       DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
 107     }
 108
 109     // For phishing we only classify HTTP pages.
 110     if (!params_.url.SchemeIs(url::kHttpScheme)) {
 111       DVLOG(1) << "Skipping phishing classification for URL: " << params_.url
 112                << " because it is not HTTP: "
 113                << params_.socket_address.host();
 114       DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
 115     }
 116
 117     // Don't run any classifier if the tab is incognito.
 118     if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
 119       DVLOG(1) << "Skipping phishing and malware classification for URL: "
 120                << params_.url << " because we're browsing incognito.";
 121       DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
 122       DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
 123     }
 124
 125     // We lookup the csd-whitelist before we lookup the cache because
 126     // a URL may have recently been whitelisted.  If the URL matches
 127     // the csd-whitelist we won't start phishing classification.  The
 128     // csd-whitelist check has to be done on the IO thread because it
 129     // uses the SafeBrowsing service class.
 130     if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
 131       BrowserThread::PostTask(
 132           BrowserThread::IO,
 133           FROM_HERE,
 134           base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
 135                      this, params_.url));
 136     }
 137   }
 138
 139   void Cancel() {
 140     DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
 141     DontClassifyForMalware(NO_CLASSIFY_CANCEL);
 142     // Just to make sure we don't do anything stupid we reset all these
 143     // pointers except for the safebrowsing service class which may be
 144     // accessed by CheckSafeBrowsingDatabase().
 145     web_contents_ = NULL;
 146     csd_service_ = NULL;
 147     host_ = NULL;
 148   }
 149
 150  private:
 151   friend class base::RefCountedThreadSafe<
 152       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
 153
 154   // Enum used to keep stats about why the pre-classification check failed.
 155   enum PreClassificationCheckFailures {
 156     OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
 157     NO_CLASSIFY_PRIVATE_IP,
 158     NO_CLASSIFY_OFF_THE_RECORD,
 159     NO_CLASSIFY_MATCH_CSD_WHITELIST,
 160     NO_CLASSIFY_TOO_MANY_REPORTS,
 161     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
 162     NO_CLASSIFY_NO_DATABASE_MANAGER,
 163     NO_CLASSIFY_KILLSWITCH,
 164     NO_CLASSIFY_CANCEL,
 165     NO_CLASSIFY_RESULT_FROM_CACHE,
 166     NO_CLASSIFY_NOT_HTTP_URL,
 167
 168     NO_CLASSIFY_MAX  // Always add new values before this one.
 169   };
 170
 171   // The destructor can be called either from the UI or the IO thread.
 172   virtual ~ShouldClassifyUrlRequest() { }
 173
 174   bool ShouldClassifyForPhishing() const {
 175     DCHECK_CURRENTLY_ON(BrowserThread::UI);
 176     return !start_phishing_classification_cb_.is_null();
 177   }
 178
 179   bool ShouldClassifyForMalware() const {
 180     DCHECK_CURRENTLY_ON(BrowserThread::UI);
 181     return !start_malware_classification_cb_.is_null();
 182   }
 183
 184   void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
 185     DCHECK_CURRENTLY_ON(BrowserThread::UI);
 186     if (ShouldClassifyForPhishing()) {
 187       // Track the first reason why we stopped classifying for phishing.
 188       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
 189                                 reason, NO_CLASSIFY_MAX);
 190       DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
 191                << reason;
 192       start_phishing_classification_cb_.Run(false);
 193     }
 194     start_phishing_classification_cb_.Reset();
 195   }
 196
 197   void DontClassifyForMalware(PreClassificationCheckFailures reason) {
 198     DCHECK_CURRENTLY_ON(BrowserThread::UI);
 199     if (ShouldClassifyForMalware()) {
 200       // Track the first reason why we stopped classifying for malware.
 201       UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
 202                                 reason, NO_CLASSIFY_MAX);
 203       DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
 204                << reason;
 205       start_malware_classification_cb_.Run(false);
 206     }
 207     start_malware_classification_cb_.Reset();
 208   }
 209
 210   void CheckSafeBrowsingDatabase(const GURL& url) {
 211     DCHECK_CURRENTLY_ON(BrowserThread::IO);
 212     // We don't want to call the classification callbacks from the IO
 213     // thread so we simply pass the results of this method to CheckCache()
 214     // which is called on the UI thread;
 215     PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
 216     PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
 217     if (!database_manager_.get()) {
 218       // We cannot check the Safe Browsing whitelists so we stop here
 219       // for safety.
 220       malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
 221     } else {
 222       if (database_manager_->MatchCsdWhitelistUrl(url)) {
 223         DVLOG(1) << "Skipping phishing classification for URL: " << url
 224                  << " because it matches the csd whitelist";
 225         phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
 226       }
 227       if (database_manager_->IsMalwareKillSwitchOn()) {
 228         malware_reason = NO_CLASSIFY_KILLSWITCH;
 229       }
 230     }
 231     BrowserThread::PostTask(
 232         BrowserThread::UI,
 233         FROM_HERE,
 234         base::Bind(&ShouldClassifyUrlRequest::CheckCache,
 235                    this,
 236                    phishing_reason,
 237                    malware_reason));
 238   }
 239
 240   void CheckCache(PreClassificationCheckFailures phishing_reason,
 241                   PreClassificationCheckFailures malware_reason) {
 242     DCHECK_CURRENTLY_ON(BrowserThread::UI);
 243     if (phishing_reason != NO_CLASSIFY_MAX)
 244       DontClassifyForPhishing(phishing_reason);
 245     if (malware_reason != NO_CLASSIFY_MAX)
 246       DontClassifyForMalware(malware_reason);
 247     if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
 248       return;  // No point in doing anything else.
 249     }
 250     // If result is cached, we don't want to run classification again.
 251     // In that case we're just trying to show the warning.
 252     bool is_phishing;
 253     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
 254       DVLOG(1) << "Satisfying request for " << params_.url << " from cache";
 255       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
 256       // Since we are already on the UI thread, this is safe.
 257       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
 258       DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
 259     }
 260
 261     // We want to limit the number of requests, though we will ignore the
 262     // limit for urls in the cache.  We don't want to start classifying
 263     // too many pages as phishing, but for those that we already think are
 264     // phishing we want to send a request to the server to give ourselves
 265     // a chance to fix misclassifications.
 266     if (csd_service_->IsInCache(params_.url)) {
 267       DVLOG(1) << "Reporting limit skipped for " << params_.url
 268                << " as it was in the cache.";
 269       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
 270     } else if (csd_service_->OverPhishingReportLimit()) {
 271       DVLOG(1) << "Too many report phishing requests sent recently, "
 272                << "not running classification for " << params_.url;
 273       DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
 274     }
 275     if (csd_service_->OverMalwareReportLimit()) {
 276       DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
 277     }
 278
 279     // Everything checks out, so start classification.
 280     // |web_contents_| is safe to call as we will be destructed
 281     // before it is.
 282     if (ShouldClassifyForPhishing()) {
 283       start_phishing_classification_cb_.Run(true);
 284       // Reset the callback to make sure ShouldClassifyForPhishing()
 285       // returns false.
 286       start_phishing_classification_cb_.Reset();
 287     }
 288     if (ShouldClassifyForMalware()) {
 289       start_malware_classification_cb_.Run(true);
 290       // Reset the callback to make sure ShouldClassifyForMalware()
 291       // returns false.
 292       start_malware_classification_cb_.Reset();
 293     }
 294   }
 295
 296   content::FrameNavigateParams params_;
 297   WebContents* web_contents_;
 298   ClientSideDetectionService* csd_service_;
 299   // We keep a ref pointer here just to make sure the safe browsing
 300   // database manager stays alive long enough.
 301   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
 302   ClientSideDetectionHost* host_;
 303
 304   ShouldClassifyUrlCallback start_phishing_classification_cb_;
 305   ShouldClassifyUrlCallback start_malware_classification_cb_;
 306
 307   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
 308 };
 309
 310 // static
 311 ClientSideDetectionHost* ClientSideDetectionHost::Create(
 312     WebContents* tab) {
 313   return new ClientSideDetectionHost(tab);
 314 }
 315
 316 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
 317     : content::WebContentsObserver(tab),
 318       csd_service_(NULL),
 319       classification_request_(NULL),
 320       should_extract_malware_features_(true),
 321       should_classify_for_malware_(false),
 322       pageload_complete_(false),
 323       unsafe_unique_page_id_(-1),
 324       weak_factory_(this) {
 325   DCHECK(tab);
 326   // Note: csd_service_ and sb_service will be NULL here in testing.
 327   csd_service_ = g_browser_process->safe_browsing_detection_service();
 328   feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
 329
 330   scoped_refptr<SafeBrowsingService> sb_service =
 331       g_browser_process->safe_browsing_service();
 332   if (sb_service.get()) {
 333     ui_manager_ = sb_service->ui_manager();
 334     database_manager_ = sb_service->database_manager();
 335     ui_manager_->AddObserver(this);
 336   }
 337 }
 338
 339 ClientSideDetectionHost::~ClientSideDetectionHost() {
 340   if (ui_manager_.get())
 341     ui_manager_->RemoveObserver(this);
 342 }
 343
 344 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
 345   bool handled = true;
 346   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
 347     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
 348                         OnPhishingDetectionDone)
 349     IPC_MESSAGE_UNHANDLED(handled = false)
 350   IPC_END_MESSAGE_MAP()
 351   return handled;
 352 }
 353
 354 void ClientSideDetectionHost::DidNavigateMainFrame(
 355     const content::LoadCommittedDetails& details,
 356     const content::FrameNavigateParams& params) {
 357   // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
 358   // that don't call this method on the UI thread.
 359   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 360   if (details.is_in_page) {
 361     // If the navigation is within the same page, the user isn't really
 362     // navigating away.  We don't need to cancel a pending callback or
 363     // begin a new classification.
 364     return;
 365   }
 366   // Cancel any pending classification request.
 367   if (classification_request_.get()) {
 368     classification_request_->Cancel();
 369   }
 370   // If we navigate away and there currently is a pending phishing
 371   // report request we have to cancel it to make sure we don't display
 372   // an interstitial for the wrong page.  Note that this won't cancel
 373   // the server ping back but only cancel the showing of the
 374   // interstial.
 375   weak_factory_.InvalidateWeakPtrs();
 376
 377   if (!csd_service_) {
 378     return;
 379   }
 380   browse_info_.reset(new BrowseInfo);
 381
 382   // Store redirect chain information.
 383   if (params.url.host() != cur_host_) {
 384     cur_host_ = params.url.host();
 385     cur_host_redirects_ = params.redirects;
 386   }
 387   browse_info_->url = params.url;
 388   browse_info_->host_redirects = cur_host_redirects_;
 389   browse_info_->url_redirects = params.redirects;
 390   browse_info_->referrer = params.referrer.url;
 391   browse_info_->http_status_code = details.http_status_code;
 392
 393   should_extract_malware_features_ = true;
 394   should_classify_for_malware_ = false;
 395   pageload_complete_ = false;
 396
 397   // Check whether we can cassify the current URL for phishing or malware.
 398   classification_request_ = new ShouldClassifyUrlRequest(
 399       params,
 400       base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
 401                  weak_factory_.GetWeakPtr()),
 402       base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
 403                  weak_factory_.GetWeakPtr()),
 404       web_contents(), csd_service_, database_manager_.get(), this);
 405   classification_request_->Start();
 406 }
 407
 408 void ClientSideDetectionHost::OnSafeBrowsingHit(
 409     const SafeBrowsingUIManager::UnsafeResource& resource) {
 410   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
 411     return;
 412
 413   // Check that the hit is either malware or phishing.
 414   if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
 415       resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
 416     return;
 417
 418   // Check that this notification is really for us.
 419   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
 420       resource.render_process_host_id, resource.render_view_id);
 421   if (!hit_rvh ||
 422       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
 423     return;
 424
 425   // Store the unique page ID for later.
 426   unsafe_unique_page_id_ =
 427       web_contents()->GetController().GetActiveEntry()->GetUniqueID();
 428
 429   // We also keep the resource around in order to be able to send the
 430   // malicious URL to the server.
 431   unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
 432   unsafe_resource_->callback.Reset();  // Don't do anything stupid.
 433 }
 434
 435 void ClientSideDetectionHost::OnSafeBrowsingMatch(
 436     const SafeBrowsingUIManager::UnsafeResource& resource) {
 437   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
 438     return;
 439
 440   // Check that this notification is really for us.
 441   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
 442       resource.render_process_host_id, resource.render_view_id);
 443   if (!hit_rvh ||
 444       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
 445     return;
 446
 447   web_contents()->GetController().GetActiveEntry()->SetExtraData(
 448       kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
 449 }
 450
 451 scoped_refptr<SafeBrowsingDatabaseManager>
 452 ClientSideDetectionHost::database_manager() {
 453   return database_manager_;
 454 }
 455
 456 bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
 457   if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
 458     return false;
 459
 460   // If an interstitial page is showing, GetVisibleEntry will return the
 461   // transient NavigationEntry for the interstitial. The transient entry
 462   // will not have the flag set, so use the pending entry instead if there
 463   // is one.
 464   NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
 465   if (!entry) {
 466     entry = web_contents()->GetController().GetVisibleEntry();
 467     if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
 468       entry = web_contents()->GetController().GetLastCommittedEntry();
 469     if (!entry)
 470       return false;
 471   }
 472
 473   base::string16 value;
 474   return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
 475 }
 476
 477 void ClientSideDetectionHost::WebContentsDestroyed() {
 478   // Tell any pending classification request that it is being canceled.
 479   if (classification_request_.get()) {
 480     classification_request_->Cancel();
 481   }
 482   // Cancel all pending feature extractions.
 483   feature_extractor_.reset();
 484 }
 485
 486 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
 487     bool should_classify) {
 488   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 489   if (browse_info_.get() && should_classify) {
 490     DVLOG(1) << "Instruct renderer to start phishing detection for URL: "
 491              << browse_info_->url;
 492     content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
 493     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
 494         rvh->GetRoutingID(), browse_info_->url));
 495   }
 496 }
 497
 498 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
 499     bool should_classify) {
 500   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 501   // If classification checks failed we should stop extracting malware features.
 502   DVLOG(2) << "Malware pre-classification checks done. Should classify: "
 503            << should_classify;
 504   should_extract_malware_features_ = should_classify;
 505   should_classify_for_malware_ = should_classify;
 506   MaybeStartMalwareFeatureExtraction();
 507 }
 508
 509 void ClientSideDetectionHost::DidStopLoading() {
 510   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 511   if (!csd_service_ || !browse_info_.get())
 512     return;
 513   DVLOG(2) << "Page finished loading.";
 514   pageload_complete_ = true;
 515   MaybeStartMalwareFeatureExtraction();
 516 }
 517
 518 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
 519   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 520   if (csd_service_ && browse_info_.get() &&
 521       should_classify_for_malware_ &&
 522       pageload_complete_) {
 523     scoped_ptr<ClientMalwareRequest> malware_request(
 524         new ClientMalwareRequest);
 525     // Start browser-side malware feature extraction.  Once we're done it will
 526     // send the malware client verdict request.
 527     malware_request->set_url(browse_info_->url.spec());
 528     const GURL& referrer = browse_info_->referrer;
 529     if (referrer.SchemeIs("http")) {  // Only send http urls.
 530       malware_request->set_referrer_url(referrer.spec());
 531     }
 532     // This function doesn't expect browse_info_ to stay around after this
 533     // function returns.
 534     feature_extractor_->ExtractMalwareFeatures(
 535         browse_info_.get(),
 536         malware_request.release(),
 537         base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
 538                    weak_factory_.GetWeakPtr()));
 539     should_classify_for_malware_ = false;
 540   }
 541 }
 542
 543 void ClientSideDetectionHost::OnPhishingDetectionDone(
 544     const std::string& verdict_str) {
 545   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 546   // There is something seriously wrong if there is no service class but
 547   // this method is called.  The renderer should not start phishing detection
 548   // if there isn't any service class in the browser.
 549   DCHECK(csd_service_);
 550   DCHECK(browse_info_.get());
 551
 552   // We parse the protocol buffer here.  If we're unable to parse it we won't
 553   // send the verdict further.
 554   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
 555   if (csd_service_ &&
 556       browse_info_.get() &&
 557       verdict->ParseFromString(verdict_str) &&
 558       verdict->IsInitialized()) {
 559     // We only send phishing verdict to the server if the verdict is phishing or
 560     // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
 561     // malware or phishing interstitial was shown but the user clicked
 562     // through.
 563     if (verdict->is_phishing() || DidShowSBInterstitial()) {
 564       if (DidShowSBInterstitial()) {
 565         browse_info_->unsafe_resource.reset(unsafe_resource_.release());
 566       }
 567       // Start browser-side feature extraction.  Once we're done it will send
 568       // the client verdict request.
 569       feature_extractor_->ExtractFeatures(
 570           browse_info_.get(),
 571           verdict.release(),
 572           base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
 573                      weak_factory_.GetWeakPtr()));
 574     }
 575   }
 576 }
 577
 578 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
 579                                                        bool is_phishing) {
 580   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 581   DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
 582            << " is_phishing:" << is_phishing;
 583   if (is_phishing) {
 584     DCHECK(web_contents());
 585     if (ui_manager_.get()) {
 586       SafeBrowsingUIManager::UnsafeResource resource;
 587       resource.url = phishing_url;
 588       resource.original_url = phishing_url;
 589       resource.is_subresource = false;
 590       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
 591       resource.render_process_host_id =
 592           web_contents()->GetRenderProcessHost()->GetID();
 593       resource.render_view_id =
 594           web_contents()->GetRenderViewHost()->GetRoutingID();
 595       if (!ui_manager_->IsWhitelisted(resource)) {
 596         // We need to stop any pending navigations, otherwise the interstital
 597         // might not get created properly.
 598         web_contents()->GetController().DiscardNonCommittedEntries();
 599       }
 600       ui_manager_->DisplayBlockingPage(resource);
 601     }
 602     // If there is true phishing verdict, invalidate weakptr so that no longer
 603     // consider the malware vedict.
 604     weak_factory_.InvalidateWeakPtrs();
 605   }
 606 }
 607
 608 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
 609                                                       GURL malware_url,
 610                                                       bool is_malware) {
 611   DCHECK_CURRENTLY_ON(BrowserThread::UI);
 612   DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
 613            << " is_malware:" << is_malware;
 614   if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
 615     DCHECK(web_contents());
 616     if (ui_manager_.get()) {
 617       SafeBrowsingUIManager::UnsafeResource resource;
 618       resource.url = malware_url;
 619       resource.original_url = original_url;
 620       resource.is_subresource = (malware_url.host() != original_url.host());
 621       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
 622       resource.render_process_host_id =
 623           web_contents()->GetRenderProcessHost()->GetID();
 624       resource.render_view_id =
 625           web_contents()->GetRenderViewHost()->GetRoutingID();
 626       if (!ui_manager_->IsWhitelisted(resource)) {
 627         // We need to stop any pending navigations, otherwise the interstital
 628         // might not get created properly.
 629         web_contents()->GetController().DiscardNonCommittedEntries();
 630       }
 631       ui_manager_->DisplayBlockingPage(resource);
 632     }
 633     // If there is true malware verdict, invalidate weakptr so that no longer
 634     // consider the phishing vedict.
 635     weak_factory_.InvalidateWeakPtrs();
 636   }
 637 }
 638
 639 void ClientSideDetectionHost::FeatureExtractionDone(
 640     bool success,
 641     scoped_ptr<ClientPhishingRequest> request) {
 642   DCHECK(request);
 643   DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
 644            << request->url() << ". Start sending client phishing request.";
 645   ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
 646   // If the client-side verdict isn't phishing we don't care about the server
 647   // response because we aren't going to display a warning.
 648   if (request->is_phishing()) {
 649     callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
 650                           weak_factory_.GetWeakPtr());
 651   }
 652   Profile* profile =
 653       Profile::FromBrowserContext(web_contents()->GetBrowserContext());
 654   // Send ping even if the browser feature extraction failed.
 655   csd_service_->SendClientReportPhishingRequest(
 656       request.release(),  // The service takes ownership of the request object.
 657       profile->GetPrefs()->GetBoolean(
 658           prefs::kSafeBrowsingExtendedReportingEnabled),
 659       callback);
 660 }
 661
 662 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
 663     bool feature_extraction_success,
 664     scoped_ptr<ClientMalwareRequest> request) {
 665   DCHECK(request.get());
 666   DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
 667            << ", with badip url count:" << request->bad_ip_url_info_size();
 668
 669   // Send ping if there is matching features.
 670   if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
 671     DVLOG(1) << "Start sending client malware request.";
 672     ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
 673     callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
 674                           weak_factory_.GetWeakPtr());
 675     csd_service_->SendClientReportMalwareRequest(request.release(), callback);
 676   }
 677 }
 678
 679 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
 680                                              const std::string& url,
 681                                              const std::string& method,
 682                                              const std::string& referrer,
 683                                              const ResourceType resource_type) {
 684   if (ip.empty() || url.empty())
 685     return;
 686
 687   IPUrlMap::iterator it = browse_info_->ips.find(ip);
 688   if (it == browse_info_->ips.end()) {
 689     if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
 690       std::vector<IPUrlInfo> url_infos;
 691       url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
 692       browse_info_->ips.insert(make_pair(ip, url_infos));
 693     }
 694   } else if (it->second.size() < kMaxUrlsPerIP) {
 695     it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
 696   }
 697 }
 698
 699 void ClientSideDetectionHost::DidGetResourceResponseStart(
 700     const content::ResourceRequestDetails& details) {
 701   if (browse_info_.get() && should_extract_malware_features_ &&
 702       details.url.is_valid()) {
 703     UpdateIPUrlMap(details.socket_address.host() /* ip */,
 704                    details.url.spec() /* url */,
 705                    details.method,
 706                    details.referrer,
 707                    details.resource_type);
 708   }
 709 }
 710
 711 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
 712   if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
 713     return false;
 714   }
 715   const NavigationEntry* nav_entry =
 716       web_contents()->GetController().GetActiveEntry();
 717   return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
 718 }
 719
 720 void ClientSideDetectionHost::set_client_side_detection_service(
 721     ClientSideDetectionService* service) {
 722   csd_service_ = service;
 723 }
 724
 725 void ClientSideDetectionHost::set_safe_browsing_managers(
 726     SafeBrowsingUIManager* ui_manager,
 727     SafeBrowsingDatabaseManager* database_manager) {
 728   if (ui_manager_.get())
 729     ui_manager_->RemoveObserver(this);
 730
 731   ui_manager_ = ui_manager;
 732   if (ui_manager)
 733     ui_manager_->AddObserver(this);
 734
 735   database_manager_ = database_manager;
 736 }
 737
 738 }  // namespace safe_browsing