chrome/browser/safe_browsing/client_side_detection_host.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
   6
   7 #include <vector>
   8
   9 #include "base/logging.h"
  10 #include "base/memory/ref_counted.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/metrics/histogram.h"
  13 #include "base/prefs/pref_service.h"
  14 #include "base/sequenced_task_runner_helpers.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "chrome/browser/browser_process.h"
  17 #include "chrome/browser/profiles/profile.h"
  18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
  19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
  20 #include "chrome/browser/safe_browsing/database_manager.h"
  21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
  22 #include "chrome/common/pref_names.h"
  23 #include "chrome/common/safe_browsing/csd.pb.h"
  24 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
  25 #include "content/public/browser/browser_thread.h"
  26 #include "content/public/browser/navigation_controller.h"
  27 #include "content/public/browser/navigation_details.h"
  28 #include "content/public/browser/navigation_entry.h"
  29 #include "content/public/browser/notification_details.h"
  30 #include "content/public/browser/notification_source.h"
  31 #include "content/public/browser/notification_types.h"
  32 #include "content/public/browser/render_process_host.h"
  33 #include "content/public/browser/render_view_host.h"
  34 #include "content/public/browser/resource_request_details.h"
  35 #include "content/public/browser/web_contents.h"
  36 #include "content/public/common/frame_navigate_params.h"
  37 #include "content/public/common/url_constants.h"
  38 #include "url/gurl.h"
  39
  40 using content::BrowserThread;
  41 using content::NavigationEntry;
  42 using content::ResourceRequestDetails;
  43 using content::ResourceType;
  44 using content::WebContents;
  45
  46 namespace safe_browsing {
  47
  48 const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
  49 const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
  50
  51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
  52
  53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
  54
  55 // This class is instantiated each time a new toplevel URL loads, and
  56 // asynchronously checks whether the malware and phishing classifiers should run
  57 // for this URL.  If so, it notifies the host class by calling the provided
  58 // callback form the UI thread.  Objects of this class are ref-counted and will
  59 // be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
  60 // or |host| go away you need to call Cancel().  We keep the |database_manager|
  61 // alive in a ref pointer for as long as it takes.
  62 class ClientSideDetectionHost::ShouldClassifyUrlRequest
  63     : public base::RefCountedThreadSafe<
  64           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
  65  public:
  66   ShouldClassifyUrlRequest(
  67       const content::FrameNavigateParams& params,
  68       const ShouldClassifyUrlCallback& start_phishing_classification,
  69       const ShouldClassifyUrlCallback& start_malware_classification,
  70       WebContents* web_contents,
  71       ClientSideDetectionService* csd_service,
  72       SafeBrowsingDatabaseManager* database_manager,
  73       ClientSideDetectionHost* host)
  74       : params_(params),
  75         web_contents_(web_contents),
  76         csd_service_(csd_service),
  77         database_manager_(database_manager),
  78         host_(host),
  79         start_phishing_classification_cb_(start_phishing_classification),
  80         start_malware_classification_cb_(start_malware_classification) {
  81     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
  82     DCHECK(web_contents_);
  83     DCHECK(csd_service_);
  84     DCHECK(database_manager_.get());
  85     DCHECK(host_);
  86   }
  87
  88   void Start() {
  89     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
  90
  91     // We start by doing some simple checks that can run on the UI thread.
  92     UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
  93     UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
  94
  95     // Only classify [X]HTML documents.
  96     if (params_.contents_mime_type != "text/html" &&
  97         params_.contents_mime_type != "application/xhtml+xml") {
  98       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
  99               << " because it has an unsupported MIME type: "
 100               << params_.contents_mime_type;
 101       DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
 102     }
 103
 104     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
 105       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
 106               << " because of hosting on private IP: "
 107               << params_.socket_address.host();
 108       DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
 109       DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
 110     }
 111
 112     // For phishing we only classify HTTP pages.
 113     if (!params_.url.SchemeIs(url::kHttpScheme)) {
 114       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
 115               << " because it is not HTTP: "
 116               << params_.socket_address.host();
 117       DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
 118     }
 119
 120     // Don't run any classifier if the tab is incognito.
 121     if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
 122       VLOG(1) << "Skipping phishing and malware classification for URL: "
 123               << params_.url << " because we're browsing incognito.";
 124       DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
 125       DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
 126     }
 127
 128     // We lookup the csd-whitelist before we lookup the cache because
 129     // a URL may have recently been whitelisted.  If the URL matches
 130     // the csd-whitelist we won't start phishing classification.  The
 131     // csd-whitelist check has to be done on the IO thread because it
 132     // uses the SafeBrowsing service class.
 133     if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
 134       BrowserThread::PostTask(
 135           BrowserThread::IO,
 136           FROM_HERE,
 137           base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
 138                      this, params_.url));
 139     }
 140   }
 141
 142   void Cancel() {
 143     DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
 144     DontClassifyForMalware(NO_CLASSIFY_CANCEL);
 145     // Just to make sure we don't do anything stupid we reset all these
 146     // pointers except for the safebrowsing service class which may be
 147     // accessed by CheckSafeBrowsingDatabase().
 148     web_contents_ = NULL;
 149     csd_service_ = NULL;
 150     host_ = NULL;
 151   }
 152
 153  private:
 154   friend class base::RefCountedThreadSafe<
 155       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
 156
 157   // Enum used to keep stats about why the pre-classification check failed.
 158   enum PreClassificationCheckFailures {
 159     OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
 160     NO_CLASSIFY_PRIVATE_IP,
 161     NO_CLASSIFY_OFF_THE_RECORD,
 162     NO_CLASSIFY_MATCH_CSD_WHITELIST,
 163     NO_CLASSIFY_TOO_MANY_REPORTS,
 164     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
 165     NO_CLASSIFY_NO_DATABASE_MANAGER,
 166     NO_CLASSIFY_KILLSWITCH,
 167     NO_CLASSIFY_CANCEL,
 168     NO_CLASSIFY_RESULT_FROM_CACHE,
 169     NO_CLASSIFY_NOT_HTTP_URL,
 170
 171     NO_CLASSIFY_MAX  // Always add new values before this one.
 172   };
 173
 174   // The destructor can be called either from the UI or the IO thread.
 175   virtual ~ShouldClassifyUrlRequest() { }
 176
 177   bool ShouldClassifyForPhishing() const {
 178     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 179     return !start_phishing_classification_cb_.is_null();
 180   }
 181
 182   bool ShouldClassifyForMalware() const {
 183     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 184     return !start_malware_classification_cb_.is_null();
 185   }
 186
 187   void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
 188     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 189     if (ShouldClassifyForPhishing()) {
 190       // Track the first reason why we stopped classifying for phishing.
 191       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
 192                                 reason, NO_CLASSIFY_MAX);
 193       DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
 194                << reason;
 195       start_phishing_classification_cb_.Run(false);
 196     }
 197     start_phishing_classification_cb_.Reset();
 198   }
 199
 200   void DontClassifyForMalware(PreClassificationCheckFailures reason) {
 201     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 202     if (ShouldClassifyForMalware()) {
 203       // Track the first reason why we stopped classifying for malware.
 204       UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
 205                                 reason, NO_CLASSIFY_MAX);
 206       DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
 207                << reason;
 208       start_malware_classification_cb_.Run(false);
 209     }
 210     start_malware_classification_cb_.Reset();
 211   }
 212
 213   void CheckSafeBrowsingDatabase(const GURL& url) {
 214     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 215     // We don't want to call the classification callbacks from the IO
 216     // thread so we simply pass the results of this method to CheckCache()
 217     // which is called on the UI thread;
 218     PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
 219     PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
 220     if (!database_manager_.get()) {
 221       // We cannot check the Safe Browsing whitelists so we stop here
 222       // for safety.
 223       malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
 224     } else {
 225       if (database_manager_->MatchCsdWhitelistUrl(url)) {
 226         VLOG(1) << "Skipping phishing classification for URL: " << url
 227                 << " because it matches the csd whitelist";
 228         phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
 229       }
 230       if (database_manager_->IsMalwareKillSwitchOn()) {
 231         malware_reason = NO_CLASSIFY_KILLSWITCH;
 232       }
 233     }
 234     BrowserThread::PostTask(
 235         BrowserThread::UI,
 236         FROM_HERE,
 237         base::Bind(&ShouldClassifyUrlRequest::CheckCache,
 238                    this,
 239                    phishing_reason,
 240                    malware_reason));
 241   }
 242
 243   void CheckCache(PreClassificationCheckFailures phishing_reason,
 244                   PreClassificationCheckFailures malware_reason) {
 245     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 246     if (phishing_reason != NO_CLASSIFY_MAX)
 247       DontClassifyForPhishing(phishing_reason);
 248     if (malware_reason != NO_CLASSIFY_MAX)
 249       DontClassifyForMalware(malware_reason);
 250     if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
 251       return;  // No point in doing anything else.
 252     }
 253     // If result is cached, we don't want to run classification again.
 254     // In that case we're just trying to show the warning.
 255     bool is_phishing;
 256     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
 257       VLOG(1) << "Satisfying request for " << params_.url << " from cache";
 258       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
 259       // Since we are already on the UI thread, this is safe.
 260       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
 261       DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
 262     }
 263
 264     // We want to limit the number of requests, though we will ignore the
 265     // limit for urls in the cache.  We don't want to start classifying
 266     // too many pages as phishing, but for those that we already think are
 267     // phishing we want to send a request to the server to give ourselves
 268     // a chance to fix misclassifications.
 269     if (csd_service_->IsInCache(params_.url)) {
 270       VLOG(1) << "Reporting limit skipped for " << params_.url
 271               << " as it was in the cache.";
 272       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
 273     } else if (csd_service_->OverPhishingReportLimit()) {
 274       VLOG(1) << "Too many report phishing requests sent recently, "
 275               << "not running classification for " << params_.url;
 276       DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
 277     }
 278     if (csd_service_->OverMalwareReportLimit()) {
 279       DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
 280     }
 281
 282     // Everything checks out, so start classification.
 283     // |web_contents_| is safe to call as we will be destructed
 284     // before it is.
 285     if (ShouldClassifyForPhishing()) {
 286       start_phishing_classification_cb_.Run(true);
 287       // Reset the callback to make sure ShouldClassifyForPhishing()
 288       // returns false.
 289       start_phishing_classification_cb_.Reset();
 290     }
 291     if (ShouldClassifyForMalware()) {
 292       start_malware_classification_cb_.Run(true);
 293       // Reset the callback to make sure ShouldClassifyForMalware()
 294       // returns false.
 295       start_malware_classification_cb_.Reset();
 296     }
 297   }
 298
 299   content::FrameNavigateParams params_;
 300   WebContents* web_contents_;
 301   ClientSideDetectionService* csd_service_;
 302   // We keep a ref pointer here just to make sure the safe browsing
 303   // database manager stays alive long enough.
 304   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
 305   ClientSideDetectionHost* host_;
 306
 307   ShouldClassifyUrlCallback start_phishing_classification_cb_;
 308   ShouldClassifyUrlCallback start_malware_classification_cb_;
 309
 310   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
 311 };
 312
 313 // static
 314 ClientSideDetectionHost* ClientSideDetectionHost::Create(
 315     WebContents* tab) {
 316   return new ClientSideDetectionHost(tab);
 317 }
 318
 319 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
 320     : content::WebContentsObserver(tab),
 321       csd_service_(NULL),
 322       classification_request_(NULL),
 323       should_extract_malware_features_(true),
 324       should_classify_for_malware_(false),
 325       pageload_complete_(false),
 326       unsafe_unique_page_id_(-1),
 327       weak_factory_(this) {
 328   DCHECK(tab);
 329   // Note: csd_service_ and sb_service will be NULL here in testing.
 330   csd_service_ = g_browser_process->safe_browsing_detection_service();
 331   feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
 332   registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
 333                  content::Source<WebContents>(tab));
 334
 335   scoped_refptr<SafeBrowsingService> sb_service =
 336       g_browser_process->safe_browsing_service();
 337   if (sb_service.get()) {
 338     ui_manager_ = sb_service->ui_manager();
 339     database_manager_ = sb_service->database_manager();
 340     ui_manager_->AddObserver(this);
 341   }
 342 }
 343
 344 ClientSideDetectionHost::~ClientSideDetectionHost() {
 345   if (ui_manager_.get())
 346     ui_manager_->RemoveObserver(this);
 347 }
 348
 349 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
 350   bool handled = true;
 351   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
 352     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
 353                         OnPhishingDetectionDone)
 354     IPC_MESSAGE_UNHANDLED(handled = false)
 355   IPC_END_MESSAGE_MAP()
 356   return handled;
 357 }
 358
 359 void ClientSideDetectionHost::DidNavigateMainFrame(
 360     const content::LoadCommittedDetails& details,
 361     const content::FrameNavigateParams& params) {
 362   // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
 363   // that don't call this method on the UI thread.
 364   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 365   if (details.is_in_page) {
 366     // If the navigation is within the same page, the user isn't really
 367     // navigating away.  We don't need to cancel a pending callback or
 368     // begin a new classification.
 369     return;
 370   }
 371   // Cancel any pending classification request.
 372   if (classification_request_.get()) {
 373     classification_request_->Cancel();
 374   }
 375   // If we navigate away and there currently is a pending phishing
 376   // report request we have to cancel it to make sure we don't display
 377   // an interstitial for the wrong page.  Note that this won't cancel
 378   // the server ping back but only cancel the showing of the
 379   // interstial.
 380   weak_factory_.InvalidateWeakPtrs();
 381
 382   if (!csd_service_) {
 383     return;
 384   }
 385   browse_info_.reset(new BrowseInfo);
 386
 387   // Store redirect chain information.
 388   if (params.url.host() != cur_host_) {
 389     cur_host_ = params.url.host();
 390     cur_host_redirects_ = params.redirects;
 391   }
 392   browse_info_->url = params.url;
 393   browse_info_->host_redirects = cur_host_redirects_;
 394   browse_info_->url_redirects = params.redirects;
 395   browse_info_->referrer = params.referrer.url;
 396   browse_info_->http_status_code = details.http_status_code;
 397   browse_info_->page_id = params.page_id;
 398
 399   should_extract_malware_features_ = true;
 400   should_classify_for_malware_ = false;
 401   pageload_complete_ = false;
 402
 403   // Check whether we can cassify the current URL for phishing or malware.
 404   classification_request_ = new ShouldClassifyUrlRequest(
 405       params,
 406       base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
 407                  weak_factory_.GetWeakPtr()),
 408       base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
 409                  weak_factory_.GetWeakPtr()),
 410       web_contents(), csd_service_, database_manager_.get(), this);
 411   classification_request_->Start();
 412 }
 413
 414 void ClientSideDetectionHost::OnSafeBrowsingHit(
 415     const SafeBrowsingUIManager::UnsafeResource& resource) {
 416   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
 417     return;
 418
 419   // Check that the hit is either malware or phishing.
 420   if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
 421       resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
 422     return;
 423
 424   // Check that this notification is really for us.
 425   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
 426       resource.render_process_host_id, resource.render_view_id);
 427   if (!hit_rvh ||
 428       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
 429     return;
 430
 431   // Store the unique page ID for later.
 432   unsafe_unique_page_id_ =
 433       web_contents()->GetController().GetActiveEntry()->GetUniqueID();
 434
 435   // We also keep the resource around in order to be able to send the
 436   // malicious URL to the server.
 437   unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
 438   unsafe_resource_->callback.Reset();  // Don't do anything stupid.
 439 }
 440
 441 void ClientSideDetectionHost::OnSafeBrowsingMatch(
 442     const SafeBrowsingUIManager::UnsafeResource& resource) {
 443   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
 444     return;
 445
 446   // Check that this notification is really for us.
 447   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
 448       resource.render_process_host_id, resource.render_view_id);
 449   if (!hit_rvh ||
 450       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
 451     return;
 452
 453   web_contents()->GetController().GetActiveEntry()->SetExtraData(
 454       kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
 455 }
 456
 457 scoped_refptr<SafeBrowsingDatabaseManager>
 458 ClientSideDetectionHost::database_manager() {
 459   return database_manager_;
 460 }
 461
 462 bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
 463   if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
 464     return false;
 465
 466   // If an interstitial page is showing, GetVisibleEntry will return the
 467   // transient NavigationEntry for the interstitial. The transient entry
 468   // will not have the flag set, so use the pending entry instead if there
 469   // is one.
 470   NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
 471   if (!entry) {
 472     entry = web_contents()->GetController().GetVisibleEntry();
 473     if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
 474       entry = web_contents()->GetController().GetLastCommittedEntry();
 475     if (!entry)
 476       return false;
 477   }
 478
 479   base::string16 value;
 480   return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
 481 }
 482
 483 void ClientSideDetectionHost::WebContentsDestroyed() {
 484   // Tell any pending classification request that it is being canceled.
 485   if (classification_request_.get()) {
 486     classification_request_->Cancel();
 487   }
 488   // Cancel all pending feature extractions.
 489   feature_extractor_.reset();
 490 }
 491
 492 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
 493     bool should_classify) {
 494   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 495   if (browse_info_.get() && should_classify) {
 496     VLOG(1) << "Instruct renderer to start phishing detection for URL: "
 497             << browse_info_->url;
 498     content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
 499     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
 500         rvh->GetRoutingID(), browse_info_->url));
 501   }
 502 }
 503
 504 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
 505     bool should_classify) {
 506   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 507   // If classification checks failed we should stop extracting malware features.
 508   DVLOG(2) << "Malware pre-classification checks done. Should classify: "
 509            << should_classify;
 510   should_extract_malware_features_ = should_classify;
 511   should_classify_for_malware_ = should_classify;
 512   MaybeStartMalwareFeatureExtraction();
 513 }
 514
 515 void ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
 516   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 517   if (!csd_service_ || !browse_info_.get())
 518     return;
 519   DVLOG(2) << "Page finished loading.";
 520   pageload_complete_ = true;
 521   MaybeStartMalwareFeatureExtraction();
 522 }
 523
 524 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
 525   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 526   if (csd_service_ && browse_info_.get() &&
 527       should_classify_for_malware_ &&
 528       pageload_complete_) {
 529     scoped_ptr<ClientMalwareRequest> malware_request(
 530         new ClientMalwareRequest);
 531     // Start browser-side malware feature extraction.  Once we're done it will
 532     // send the malware client verdict request.
 533     malware_request->set_url(browse_info_->url.spec());
 534     const GURL& referrer = browse_info_->referrer;
 535     if (referrer.SchemeIs("http")) {  // Only send http urls.
 536       malware_request->set_referrer_url(referrer.spec());
 537     }
 538     // This function doesn't expect browse_info_ to stay around after this
 539     // function returns.
 540     feature_extractor_->ExtractMalwareFeatures(
 541         browse_info_.get(),
 542         malware_request.release(),
 543         base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
 544                    weak_factory_.GetWeakPtr()));
 545     should_classify_for_malware_ = false;
 546   }
 547 }
 548
 549 void ClientSideDetectionHost::OnPhishingDetectionDone(
 550     const std::string& verdict_str) {
 551   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 552   // There is something seriously wrong if there is no service class but
 553   // this method is called.  The renderer should not start phishing detection
 554   // if there isn't any service class in the browser.
 555   DCHECK(csd_service_);
 556   DCHECK(browse_info_.get());
 557
 558   // We parse the protocol buffer here.  If we're unable to parse it we won't
 559   // send the verdict further.
 560   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
 561   if (csd_service_ &&
 562       browse_info_.get() &&
 563       verdict->ParseFromString(verdict_str) &&
 564       verdict->IsInitialized()) {
 565     // We only send phishing verdict to the server if the verdict is phishing or
 566     // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
 567     // malware or phishing interstitial was shown but the user clicked
 568     // through.
 569     if (verdict->is_phishing() || DidShowSBInterstitial()) {
 570       if (DidShowSBInterstitial()) {
 571         browse_info_->unsafe_resource.reset(unsafe_resource_.release());
 572       }
 573       // Start browser-side feature extraction.  Once we're done it will send
 574       // the client verdict request.
 575       feature_extractor_->ExtractFeatures(
 576           browse_info_.get(),
 577           verdict.release(),
 578           base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
 579                      weak_factory_.GetWeakPtr()));
 580     }
 581   }
 582 }
 583
 584 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
 585                                                        bool is_phishing) {
 586   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 587   DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
 588            << " is_phishing:" << is_phishing;
 589   if (is_phishing) {
 590     DCHECK(web_contents());
 591     if (ui_manager_.get()) {
 592       SafeBrowsingUIManager::UnsafeResource resource;
 593       resource.url = phishing_url;
 594       resource.original_url = phishing_url;
 595       resource.is_subresource = false;
 596       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
 597       resource.render_process_host_id =
 598           web_contents()->GetRenderProcessHost()->GetID();
 599       resource.render_view_id =
 600           web_contents()->GetRenderViewHost()->GetRoutingID();
 601       if (!ui_manager_->IsWhitelisted(resource)) {
 602         // We need to stop any pending navigations, otherwise the interstital
 603         // might not get created properly.
 604         web_contents()->GetController().DiscardNonCommittedEntries();
 605       }
 606       ui_manager_->DisplayBlockingPage(resource);
 607     }
 608     // If there is true phishing verdict, invalidate weakptr so that no longer
 609     // consider the malware vedict.
 610     weak_factory_.InvalidateWeakPtrs();
 611   }
 612 }
 613
 614 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
 615                                                       GURL malware_url,
 616                                                       bool is_malware) {
 617   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 618   DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
 619            << " is_malware:" << is_malware;
 620   if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
 621     DCHECK(web_contents());
 622     if (ui_manager_.get()) {
 623       SafeBrowsingUIManager::UnsafeResource resource;
 624       resource.url = malware_url;
 625       resource.original_url = original_url;
 626       resource.is_subresource = (malware_url.host() != original_url.host());
 627       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
 628       resource.render_process_host_id =
 629           web_contents()->GetRenderProcessHost()->GetID();
 630       resource.render_view_id =
 631           web_contents()->GetRenderViewHost()->GetRoutingID();
 632       if (!ui_manager_->IsWhitelisted(resource)) {
 633         // We need to stop any pending navigations, otherwise the interstital
 634         // might not get created properly.
 635         web_contents()->GetController().DiscardNonCommittedEntries();
 636       }
 637       ui_manager_->DisplayBlockingPage(resource);
 638     }
 639     // If there is true malware verdict, invalidate weakptr so that no longer
 640     // consider the phishing vedict.
 641     weak_factory_.InvalidateWeakPtrs();
 642   }
 643 }
 644
 645 void ClientSideDetectionHost::FeatureExtractionDone(
 646     bool success,
 647     scoped_ptr<ClientPhishingRequest> request) {
 648   DCHECK(request);
 649   DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
 650            << request->url() << ". Start sending client phishing request.";
 651   ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
 652   // If the client-side verdict isn't phishing we don't care about the server
 653   // response because we aren't going to display a warning.
 654   if (request->is_phishing()) {
 655     callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
 656                           weak_factory_.GetWeakPtr());
 657   }
 658   // Send ping even if the browser feature extraction failed.
 659   csd_service_->SendClientReportPhishingRequest(
 660       request.release(),  // The service takes ownership of the request object.
 661       callback);
 662 }
 663
 664 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
 665     bool feature_extraction_success,
 666     scoped_ptr<ClientMalwareRequest> request) {
 667   DCHECK(request.get());
 668   DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
 669            << ", with badip url count:" << request->bad_ip_url_info_size();
 670
 671   // Send ping if there is matching features.
 672   if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
 673     VLOG(1) << "Start sending client malware request.";
 674     ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
 675     callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
 676                           weak_factory_.GetWeakPtr());
 677     csd_service_->SendClientReportMalwareRequest(request.release(), callback);
 678   }
 679 }
 680
 681 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
 682                                              const std::string& url,
 683                                              const std::string& method,
 684                                              const std::string& referrer,
 685                                              const ResourceType resource_type) {
 686   if (ip.empty() || url.empty())
 687     return;
 688
 689   IPUrlMap::iterator it = browse_info_->ips.find(ip);
 690   if (it == browse_info_->ips.end()) {
 691     if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
 692       std::vector<IPUrlInfo> url_infos;
 693       url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
 694       browse_info_->ips.insert(make_pair(ip, url_infos));
 695     }
 696   } else if (it->second.size() < kMaxUrlsPerIP) {
 697     it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
 698   }
 699 }
 700
 701 void ClientSideDetectionHost::Observe(
 702     int type,
 703     const content::NotificationSource& source,
 704     const content::NotificationDetails& details) {
 705   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 706   DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
 707   const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
 708       details).ptr();
 709   if (req && browse_info_.get() &&
 710       should_extract_malware_features_ && req->url.is_valid()) {
 711     UpdateIPUrlMap(req->socket_address.host() /* ip */,
 712                    req->url.spec()  /* url */,
 713                    req->method,
 714                    req->referrer,
 715                    req->resource_type);
 716   }
 717 }
 718
 719 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
 720   if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
 721     return false;
 722   }
 723   const NavigationEntry* nav_entry =
 724       web_contents()->GetController().GetActiveEntry();
 725   return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
 726 }
 727
 728 void ClientSideDetectionHost::set_client_side_detection_service(
 729     ClientSideDetectionService* service) {
 730   csd_service_ = service;
 731 }
 732
 733 void ClientSideDetectionHost::set_safe_browsing_managers(
 734     SafeBrowsingUIManager* ui_manager,
 735     SafeBrowsingDatabaseManager* database_manager) {
 736   if (ui_manager_.get())
 737     ui_manager_->RemoveObserver(this);
 738
 739   ui_manager_ = ui_manager;
 740   if (ui_manager)
 741     ui_manager_->AddObserver(this);
 742
 743   database_manager_ = database_manager;
 744 }
 745
 746 }  // namespace safe_browsing