chrome/browser/safe_browsing/client_side_detection_host.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
   6
   7 #include <vector>
   8
   9 #include "base/logging.h"
  10 #include "base/memory/ref_counted.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/metrics/histogram.h"
  13 #include "base/prefs/pref_service.h"
  14 #include "base/sequenced_task_runner_helpers.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "chrome/browser/browser_process.h"
  17 #include "chrome/browser/profiles/profile.h"
  18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
  19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
  20 #include "chrome/browser/safe_browsing/database_manager.h"
  21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
  22 #include "chrome/common/safe_browsing/csd.pb.h"
  23 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
  24 #include "content/public/browser/browser_thread.h"
  25 #include "content/public/browser/navigation_controller.h"
  26 #include "content/public/browser/navigation_details.h"
  27 #include "content/public/browser/navigation_entry.h"
  28 #include "content/public/browser/notification_details.h"
  29 #include "content/public/browser/notification_source.h"
  30 #include "content/public/browser/notification_types.h"
  31 #include "content/public/browser/render_process_host.h"
  32 #include "content/public/browser/render_view_host.h"
  33 #include "content/public/browser/resource_request_details.h"
  34 #include "content/public/browser/web_contents.h"
  35 #include "content/public/common/frame_navigate_params.h"
  36 #include "content/public/common/url_constants.h"
  37 #include "url/gurl.h"
  38
  39 using content::BrowserThread;
  40 using content::NavigationEntry;
  41 using content::ResourceRequestDetails;
  42 using content::ResourceType;
  43 using content::WebContents;
  44
  45 namespace safe_browsing {
  46
  47 const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
  48 const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
  49
  50 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
  51
  52 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
  53
  54 // This class is instantiated each time a new toplevel URL loads, and
  55 // asynchronously checks whether the malware and phishing classifiers should run
  56 // for this URL.  If so, it notifies the host class by calling the provided
  57 // callback form the UI thread.  Objects of this class are ref-counted and will
  58 // be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
  59 // or |host| go away you need to call Cancel().  We keep the |database_manager|
  60 // alive in a ref pointer for as long as it takes.
  61 class ClientSideDetectionHost::ShouldClassifyUrlRequest
  62     : public base::RefCountedThreadSafe<
  63           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
  64  public:
  65   ShouldClassifyUrlRequest(
  66       const content::FrameNavigateParams& params,
  67       const ShouldClassifyUrlCallback& start_phishing_classification,
  68       const ShouldClassifyUrlCallback& start_malware_classification,
  69       WebContents* web_contents,
  70       ClientSideDetectionService* csd_service,
  71       SafeBrowsingDatabaseManager* database_manager,
  72       ClientSideDetectionHost* host)
  73       : params_(params),
  74         web_contents_(web_contents),
  75         csd_service_(csd_service),
  76         database_manager_(database_manager),
  77         host_(host),
  78         start_phishing_classification_cb_(start_phishing_classification),
  79         start_malware_classification_cb_(start_malware_classification) {
  80     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
  81     DCHECK(web_contents_);
  82     DCHECK(csd_service_);
  83     DCHECK(database_manager_.get());
  84     DCHECK(host_);
  85   }
  86
  87   void Start() {
  88     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
  89
  90     // We start by doing some simple checks that can run on the UI thread.
  91     UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
  92     UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
  93
  94     // Only classify [X]HTML documents.
  95     if (params_.contents_mime_type != "text/html" &&
  96         params_.contents_mime_type != "application/xhtml+xml") {
  97       DVLOG(1) << "Skipping phishing classification for URL: " << params_.url
  98                << " because it has an unsupported MIME type: "
  99                << params_.contents_mime_type;
 100       DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
 101     }
 102
 103     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
 104       DVLOG(1) << "Skipping phishing classification for URL: " << params_.url
 105                << " because of hosting on private IP: "
 106                << params_.socket_address.host();
 107       DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
 108       DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
 109     }
 110
 111     // For phishing we only classify HTTP pages.
 112     if (!params_.url.SchemeIs(url::kHttpScheme)) {
 113       DVLOG(1) << "Skipping phishing classification for URL: " << params_.url
 114                << " because it is not HTTP: "
 115                << params_.socket_address.host();
 116       DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
 117     }
 118
 119     // Don't run any classifier if the tab is incognito.
 120     if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
 121       DVLOG(1) << "Skipping phishing and malware classification for URL: "
 122                << params_.url << " because we're browsing incognito.";
 123       DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
 124       DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
 125     }
 126
 127     // We lookup the csd-whitelist before we lookup the cache because
 128     // a URL may have recently been whitelisted.  If the URL matches
 129     // the csd-whitelist we won't start phishing classification.  The
 130     // csd-whitelist check has to be done on the IO thread because it
 131     // uses the SafeBrowsing service class.
 132     if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
 133       BrowserThread::PostTask(
 134           BrowserThread::IO,
 135           FROM_HERE,
 136           base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
 137                      this, params_.url));
 138     }
 139   }
 140
 141   void Cancel() {
 142     DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
 143     DontClassifyForMalware(NO_CLASSIFY_CANCEL);
 144     // Just to make sure we don't do anything stupid we reset all these
 145     // pointers except for the safebrowsing service class which may be
 146     // accessed by CheckSafeBrowsingDatabase().
 147     web_contents_ = NULL;
 148     csd_service_ = NULL;
 149     host_ = NULL;
 150   }
 151
 152  private:
 153   friend class base::RefCountedThreadSafe<
 154       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
 155
 156   // Enum used to keep stats about why the pre-classification check failed.
 157   enum PreClassificationCheckFailures {
 158     OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
 159     NO_CLASSIFY_PRIVATE_IP,
 160     NO_CLASSIFY_OFF_THE_RECORD,
 161     NO_CLASSIFY_MATCH_CSD_WHITELIST,
 162     NO_CLASSIFY_TOO_MANY_REPORTS,
 163     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
 164     NO_CLASSIFY_NO_DATABASE_MANAGER,
 165     NO_CLASSIFY_KILLSWITCH,
 166     NO_CLASSIFY_CANCEL,
 167     NO_CLASSIFY_RESULT_FROM_CACHE,
 168     NO_CLASSIFY_NOT_HTTP_URL,
 169
 170     NO_CLASSIFY_MAX  // Always add new values before this one.
 171   };
 172
 173   // The destructor can be called either from the UI or the IO thread.
 174   virtual ~ShouldClassifyUrlRequest() { }
 175
 176   bool ShouldClassifyForPhishing() const {
 177     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 178     return !start_phishing_classification_cb_.is_null();
 179   }
 180
 181   bool ShouldClassifyForMalware() const {
 182     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 183     return !start_malware_classification_cb_.is_null();
 184   }
 185
 186   void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
 187     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 188     if (ShouldClassifyForPhishing()) {
 189       // Track the first reason why we stopped classifying for phishing.
 190       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
 191                                 reason, NO_CLASSIFY_MAX);
 192       DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
 193                << reason;
 194       start_phishing_classification_cb_.Run(false);
 195     }
 196     start_phishing_classification_cb_.Reset();
 197   }
 198
 199   void DontClassifyForMalware(PreClassificationCheckFailures reason) {
 200     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 201     if (ShouldClassifyForMalware()) {
 202       // Track the first reason why we stopped classifying for malware.
 203       UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
 204                                 reason, NO_CLASSIFY_MAX);
 205       DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
 206                << reason;
 207       start_malware_classification_cb_.Run(false);
 208     }
 209     start_malware_classification_cb_.Reset();
 210   }
 211
 212   void CheckSafeBrowsingDatabase(const GURL& url) {
 213     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 214     // We don't want to call the classification callbacks from the IO
 215     // thread so we simply pass the results of this method to CheckCache()
 216     // which is called on the UI thread;
 217     PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
 218     PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
 219     if (!database_manager_.get()) {
 220       // We cannot check the Safe Browsing whitelists so we stop here
 221       // for safety.
 222       malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
 223     } else {
 224       if (database_manager_->MatchCsdWhitelistUrl(url)) {
 225         DVLOG(1) << "Skipping phishing classification for URL: " << url
 226                  << " because it matches the csd whitelist";
 227         phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
 228       }
 229       if (database_manager_->IsMalwareKillSwitchOn()) {
 230         malware_reason = NO_CLASSIFY_KILLSWITCH;
 231       }
 232     }
 233     BrowserThread::PostTask(
 234         BrowserThread::UI,
 235         FROM_HERE,
 236         base::Bind(&ShouldClassifyUrlRequest::CheckCache,
 237                    this,
 238                    phishing_reason,
 239                    malware_reason));
 240   }
 241
 242   void CheckCache(PreClassificationCheckFailures phishing_reason,
 243                   PreClassificationCheckFailures malware_reason) {
 244     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 245     if (phishing_reason != NO_CLASSIFY_MAX)
 246       DontClassifyForPhishing(phishing_reason);
 247     if (malware_reason != NO_CLASSIFY_MAX)
 248       DontClassifyForMalware(malware_reason);
 249     if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
 250       return;  // No point in doing anything else.
 251     }
 252     // If result is cached, we don't want to run classification again.
 253     // In that case we're just trying to show the warning.
 254     bool is_phishing;
 255     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
 256       DVLOG(1) << "Satisfying request for " << params_.url << " from cache";
 257       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
 258       // Since we are already on the UI thread, this is safe.
 259       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
 260       DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
 261     }
 262
 263     // We want to limit the number of requests, though we will ignore the
 264     // limit for urls in the cache.  We don't want to start classifying
 265     // too many pages as phishing, but for those that we already think are
 266     // phishing we want to send a request to the server to give ourselves
 267     // a chance to fix misclassifications.
 268     if (csd_service_->IsInCache(params_.url)) {
 269       DVLOG(1) << "Reporting limit skipped for " << params_.url
 270                << " as it was in the cache.";
 271       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
 272     } else if (csd_service_->OverPhishingReportLimit()) {
 273       DVLOG(1) << "Too many report phishing requests sent recently, "
 274                << "not running classification for " << params_.url;
 275       DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
 276     }
 277     if (csd_service_->OverMalwareReportLimit()) {
 278       DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
 279     }
 280
 281     // Everything checks out, so start classification.
 282     // |web_contents_| is safe to call as we will be destructed
 283     // before it is.
 284     if (ShouldClassifyForPhishing()) {
 285       start_phishing_classification_cb_.Run(true);
 286       // Reset the callback to make sure ShouldClassifyForPhishing()
 287       // returns false.
 288       start_phishing_classification_cb_.Reset();
 289     }
 290     if (ShouldClassifyForMalware()) {
 291       start_malware_classification_cb_.Run(true);
 292       // Reset the callback to make sure ShouldClassifyForMalware()
 293       // returns false.
 294       start_malware_classification_cb_.Reset();
 295     }
 296   }
 297
 298   content::FrameNavigateParams params_;
 299   WebContents* web_contents_;
 300   ClientSideDetectionService* csd_service_;
 301   // We keep a ref pointer here just to make sure the safe browsing
 302   // database manager stays alive long enough.
 303   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
 304   ClientSideDetectionHost* host_;
 305
 306   ShouldClassifyUrlCallback start_phishing_classification_cb_;
 307   ShouldClassifyUrlCallback start_malware_classification_cb_;
 308
 309   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
 310 };
 311
 312 // static
 313 ClientSideDetectionHost* ClientSideDetectionHost::Create(
 314     WebContents* tab) {
 315   return new ClientSideDetectionHost(tab);
 316 }
 317
 318 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
 319     : content::WebContentsObserver(tab),
 320       csd_service_(NULL),
 321       classification_request_(NULL),
 322       should_extract_malware_features_(true),
 323       should_classify_for_malware_(false),
 324       pageload_complete_(false),
 325       unsafe_unique_page_id_(-1),
 326       weak_factory_(this) {
 327   DCHECK(tab);
 328   // Note: csd_service_ and sb_service will be NULL here in testing.
 329   csd_service_ = g_browser_process->safe_browsing_detection_service();
 330   feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
 331   registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
 332                  content::Source<WebContents>(tab));
 333
 334   scoped_refptr<SafeBrowsingService> sb_service =
 335       g_browser_process->safe_browsing_service();
 336   if (sb_service.get()) {
 337     ui_manager_ = sb_service->ui_manager();
 338     database_manager_ = sb_service->database_manager();
 339     ui_manager_->AddObserver(this);
 340   }
 341 }
 342
 343 ClientSideDetectionHost::~ClientSideDetectionHost() {
 344   if (ui_manager_.get())
 345     ui_manager_->RemoveObserver(this);
 346 }
 347
 348 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
 349   bool handled = true;
 350   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
 351     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
 352                         OnPhishingDetectionDone)
 353     IPC_MESSAGE_UNHANDLED(handled = false)
 354   IPC_END_MESSAGE_MAP()
 355   return handled;
 356 }
 357
 358 void ClientSideDetectionHost::DidNavigateMainFrame(
 359     const content::LoadCommittedDetails& details,
 360     const content::FrameNavigateParams& params) {
 361   // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
 362   // that don't call this method on the UI thread.
 363   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 364   if (details.is_in_page) {
 365     // If the navigation is within the same page, the user isn't really
 366     // navigating away.  We don't need to cancel a pending callback or
 367     // begin a new classification.
 368     return;
 369   }
 370   // Cancel any pending classification request.
 371   if (classification_request_.get()) {
 372     classification_request_->Cancel();
 373   }
 374   // If we navigate away and there currently is a pending phishing
 375   // report request we have to cancel it to make sure we don't display
 376   // an interstitial for the wrong page.  Note that this won't cancel
 377   // the server ping back but only cancel the showing of the
 378   // interstial.
 379   weak_factory_.InvalidateWeakPtrs();
 380
 381   if (!csd_service_) {
 382     return;
 383   }
 384   browse_info_.reset(new BrowseInfo);
 385
 386   // Store redirect chain information.
 387   if (params.url.host() != cur_host_) {
 388     cur_host_ = params.url.host();
 389     cur_host_redirects_ = params.redirects;
 390   }
 391   browse_info_->url = params.url;
 392   browse_info_->host_redirects = cur_host_redirects_;
 393   browse_info_->url_redirects = params.redirects;
 394   browse_info_->referrer = params.referrer.url;
 395   browse_info_->http_status_code = details.http_status_code;
 396
 397   should_extract_malware_features_ = true;
 398   should_classify_for_malware_ = false;
 399   pageload_complete_ = false;
 400
 401   // Check whether we can cassify the current URL for phishing or malware.
 402   classification_request_ = new ShouldClassifyUrlRequest(
 403       params,
 404       base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
 405                  weak_factory_.GetWeakPtr()),
 406       base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
 407                  weak_factory_.GetWeakPtr()),
 408       web_contents(), csd_service_, database_manager_.get(), this);
 409   classification_request_->Start();
 410 }
 411
 412 void ClientSideDetectionHost::OnSafeBrowsingHit(
 413     const SafeBrowsingUIManager::UnsafeResource& resource) {
 414   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
 415     return;
 416
 417   // Check that the hit is either malware or phishing.
 418   if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
 419       resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
 420     return;
 421
 422   // Check that this notification is really for us.
 423   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
 424       resource.render_process_host_id, resource.render_view_id);
 425   if (!hit_rvh ||
 426       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
 427     return;
 428
 429   // Store the unique page ID for later.
 430   unsafe_unique_page_id_ =
 431       web_contents()->GetController().GetActiveEntry()->GetUniqueID();
 432
 433   // We also keep the resource around in order to be able to send the
 434   // malicious URL to the server.
 435   unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
 436   unsafe_resource_->callback.Reset();  // Don't do anything stupid.
 437 }
 438
 439 void ClientSideDetectionHost::OnSafeBrowsingMatch(
 440     const SafeBrowsingUIManager::UnsafeResource& resource) {
 441   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
 442     return;
 443
 444   // Check that this notification is really for us.
 445   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
 446       resource.render_process_host_id, resource.render_view_id);
 447   if (!hit_rvh ||
 448       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
 449     return;
 450
 451   web_contents()->GetController().GetActiveEntry()->SetExtraData(
 452       kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
 453 }
 454
 455 scoped_refptr<SafeBrowsingDatabaseManager>
 456 ClientSideDetectionHost::database_manager() {
 457   return database_manager_;
 458 }
 459
 460 bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
 461   if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
 462     return false;
 463
 464   // If an interstitial page is showing, GetVisibleEntry will return the
 465   // transient NavigationEntry for the interstitial. The transient entry
 466   // will not have the flag set, so use the pending entry instead if there
 467   // is one.
 468   NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
 469   if (!entry) {
 470     entry = web_contents()->GetController().GetVisibleEntry();
 471     if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
 472       entry = web_contents()->GetController().GetLastCommittedEntry();
 473     if (!entry)
 474       return false;
 475   }
 476
 477   base::string16 value;
 478   return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
 479 }
 480
 481 void ClientSideDetectionHost::WebContentsDestroyed() {
 482   // Tell any pending classification request that it is being canceled.
 483   if (classification_request_.get()) {
 484     classification_request_->Cancel();
 485   }
 486   // Cancel all pending feature extractions.
 487   feature_extractor_.reset();
 488 }
 489
 490 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
 491     bool should_classify) {
 492   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 493   if (browse_info_.get() && should_classify) {
 494     DVLOG(1) << "Instruct renderer to start phishing detection for URL: "
 495              << browse_info_->url;
 496     content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
 497     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
 498         rvh->GetRoutingID(), browse_info_->url));
 499   }
 500 }
 501
 502 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
 503     bool should_classify) {
 504   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 505   // If classification checks failed we should stop extracting malware features.
 506   DVLOG(2) << "Malware pre-classification checks done. Should classify: "
 507            << should_classify;
 508   should_extract_malware_features_ = should_classify;
 509   should_classify_for_malware_ = should_classify;
 510   MaybeStartMalwareFeatureExtraction();
 511 }
 512
 513 void ClientSideDetectionHost::DidStopLoading() {
 514   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 515   if (!csd_service_ || !browse_info_.get())
 516     return;
 517   DVLOG(2) << "Page finished loading.";
 518   pageload_complete_ = true;
 519   MaybeStartMalwareFeatureExtraction();
 520 }
 521
 522 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
 523   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 524   if (csd_service_ && browse_info_.get() &&
 525       should_classify_for_malware_ &&
 526       pageload_complete_) {
 527     scoped_ptr<ClientMalwareRequest> malware_request(
 528         new ClientMalwareRequest);
 529     // Start browser-side malware feature extraction.  Once we're done it will
 530     // send the malware client verdict request.
 531     malware_request->set_url(browse_info_->url.spec());
 532     const GURL& referrer = browse_info_->referrer;
 533     if (referrer.SchemeIs("http")) {  // Only send http urls.
 534       malware_request->set_referrer_url(referrer.spec());
 535     }
 536     // This function doesn't expect browse_info_ to stay around after this
 537     // function returns.
 538     feature_extractor_->ExtractMalwareFeatures(
 539         browse_info_.get(),
 540         malware_request.release(),
 541         base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
 542                    weak_factory_.GetWeakPtr()));
 543     should_classify_for_malware_ = false;
 544   }
 545 }
 546
 547 void ClientSideDetectionHost::OnPhishingDetectionDone(
 548     const std::string& verdict_str) {
 549   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 550   // There is something seriously wrong if there is no service class but
 551   // this method is called.  The renderer should not start phishing detection
 552   // if there isn't any service class in the browser.
 553   DCHECK(csd_service_);
 554   DCHECK(browse_info_.get());
 555
 556   // We parse the protocol buffer here.  If we're unable to parse it we won't
 557   // send the verdict further.
 558   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
 559   if (csd_service_ &&
 560       browse_info_.get() &&
 561       verdict->ParseFromString(verdict_str) &&
 562       verdict->IsInitialized()) {
 563     // We only send phishing verdict to the server if the verdict is phishing or
 564     // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
 565     // malware or phishing interstitial was shown but the user clicked
 566     // through.
 567     if (verdict->is_phishing() || DidShowSBInterstitial()) {
 568       if (DidShowSBInterstitial()) {
 569         browse_info_->unsafe_resource.reset(unsafe_resource_.release());
 570       }
 571       // Start browser-side feature extraction.  Once we're done it will send
 572       // the client verdict request.
 573       feature_extractor_->ExtractFeatures(
 574           browse_info_.get(),
 575           verdict.release(),
 576           base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
 577                      weak_factory_.GetWeakPtr()));
 578     }
 579   }
 580 }
 581
 582 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
 583                                                        bool is_phishing) {
 584   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 585   DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
 586            << " is_phishing:" << is_phishing;
 587   if (is_phishing) {
 588     DCHECK(web_contents());
 589     if (ui_manager_.get()) {
 590       SafeBrowsingUIManager::UnsafeResource resource;
 591       resource.url = phishing_url;
 592       resource.original_url = phishing_url;
 593       resource.is_subresource = false;
 594       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
 595       resource.render_process_host_id =
 596           web_contents()->GetRenderProcessHost()->GetID();
 597       resource.render_view_id =
 598           web_contents()->GetRenderViewHost()->GetRoutingID();
 599       if (!ui_manager_->IsWhitelisted(resource)) {
 600         // We need to stop any pending navigations, otherwise the interstital
 601         // might not get created properly.
 602         web_contents()->GetController().DiscardNonCommittedEntries();
 603       }
 604       ui_manager_->DisplayBlockingPage(resource);
 605     }
 606     // If there is true phishing verdict, invalidate weakptr so that no longer
 607     // consider the malware vedict.
 608     weak_factory_.InvalidateWeakPtrs();
 609   }
 610 }
 611
 612 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
 613                                                       GURL malware_url,
 614                                                       bool is_malware) {
 615   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 616   DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
 617            << " is_malware:" << is_malware;
 618   if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
 619     DCHECK(web_contents());
 620     if (ui_manager_.get()) {
 621       SafeBrowsingUIManager::UnsafeResource resource;
 622       resource.url = malware_url;
 623       resource.original_url = original_url;
 624       resource.is_subresource = (malware_url.host() != original_url.host());
 625       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
 626       resource.render_process_host_id =
 627           web_contents()->GetRenderProcessHost()->GetID();
 628       resource.render_view_id =
 629           web_contents()->GetRenderViewHost()->GetRoutingID();
 630       if (!ui_manager_->IsWhitelisted(resource)) {
 631         // We need to stop any pending navigations, otherwise the interstital
 632         // might not get created properly.
 633         web_contents()->GetController().DiscardNonCommittedEntries();
 634       }
 635       ui_manager_->DisplayBlockingPage(resource);
 636     }
 637     // If there is true malware verdict, invalidate weakptr so that no longer
 638     // consider the phishing vedict.
 639     weak_factory_.InvalidateWeakPtrs();
 640   }
 641 }
 642
 643 void ClientSideDetectionHost::FeatureExtractionDone(
 644     bool success,
 645     scoped_ptr<ClientPhishingRequest> request) {
 646   DCHECK(request);
 647   DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
 648            << request->url() << ". Start sending client phishing request.";
 649   ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
 650   // If the client-side verdict isn't phishing we don't care about the server
 651   // response because we aren't going to display a warning.
 652   if (request->is_phishing()) {
 653     callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
 654                           weak_factory_.GetWeakPtr());
 655   }
 656   // Send ping even if the browser feature extraction failed.
 657   csd_service_->SendClientReportPhishingRequest(
 658       request.release(),  // The service takes ownership of the request object.
 659       callback);
 660 }
 661
 662 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
 663     bool feature_extraction_success,
 664     scoped_ptr<ClientMalwareRequest> request) {
 665   DCHECK(request.get());
 666   DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
 667            << ", with badip url count:" << request->bad_ip_url_info_size();
 668
 669   // Send ping if there is matching features.
 670   if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
 671     DVLOG(1) << "Start sending client malware request.";
 672     ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
 673     callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
 674                           weak_factory_.GetWeakPtr());
 675     csd_service_->SendClientReportMalwareRequest(request.release(), callback);
 676   }
 677 }
 678
 679 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
 680                                              const std::string& url,
 681                                              const std::string& method,
 682                                              const std::string& referrer,
 683                                              const ResourceType resource_type) {
 684   if (ip.empty() || url.empty())
 685     return;
 686
 687   IPUrlMap::iterator it = browse_info_->ips.find(ip);
 688   if (it == browse_info_->ips.end()) {
 689     if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
 690       std::vector<IPUrlInfo> url_infos;
 691       url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
 692       browse_info_->ips.insert(make_pair(ip, url_infos));
 693     }
 694   } else if (it->second.size() < kMaxUrlsPerIP) {
 695     it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
 696   }
 697 }
 698
 699 void ClientSideDetectionHost::Observe(
 700     int type,
 701     const content::NotificationSource& source,
 702     const content::NotificationDetails& details) {
 703   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
 704   DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
 705   const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
 706       details).ptr();
 707   if (req && browse_info_.get() &&
 708       should_extract_malware_features_ && req->url.is_valid()) {
 709     UpdateIPUrlMap(req->socket_address.host() /* ip */,
 710                    req->url.spec()  /* url */,
 711                    req->method,
 712                    req->referrer,
 713                    req->resource_type);
 714   }
 715 }
 716
 717 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
 718   if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
 719     return false;
 720   }
 721   const NavigationEntry* nav_entry =
 722       web_contents()->GetController().GetActiveEntry();
 723   return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
 724 }
 725
 726 void ClientSideDetectionHost::set_client_side_detection_service(
 727     ClientSideDetectionService* service) {
 728   csd_service_ = service;
 729 }
 730
 731 void ClientSideDetectionHost::set_safe_browsing_managers(
 732     SafeBrowsingUIManager* ui_manager,
 733     SafeBrowsingDatabaseManager* database_manager) {
 734   if (ui_manager_.get())
 735     ui_manager_->RemoveObserver(this);
 736
 737   ui_manager_ = ui_manager;
 738   if (ui_manager)
 739     ui_manager_->AddObserver(this);
 740
 741   database_manager_ = database_manager;
 742 }
 743
 744 }  // namespace safe_browsing