chrome/renderer/safe_browsing/phishing_classifier_delegate.h

   1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4 //
   5 // This class is used by the RenderView to interact with a PhishingClassifier.
   6
   7 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
   8 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
   9
  10 #include "base/memory/scoped_ptr.h"
  11 #include "base/strings/string16.h"
  12 #include "content/public/renderer/render_process_observer.h"
  13 #include "content/public/renderer/render_view_observer.h"
  14 #include "ui/base/page_transition_types.h"
  15 #include "url/gurl.h"
  16
  17 namespace safe_browsing {
  18 class ClientPhishingRequest;
  19 class PhishingClassifier;
  20 class Scorer;
  21
  22 class PhishingClassifierFilter : public content::RenderProcessObserver {
  23  public:
  24   static PhishingClassifierFilter* Create();
  25   ~PhishingClassifierFilter() override;
  26
  27   bool OnControlMessageReceived(const IPC::Message& message) override;
  28
  29  private:
  30   PhishingClassifierFilter();
  31   void OnSetPhishingModel(const std::string& model);
  32
  33   DISALLOW_COPY_AND_ASSIGN(PhishingClassifierFilter);
  34 };
  35
  36 class PhishingClassifierDelegate : public content::RenderViewObserver {
  37  public:
  38   // The RenderView owns us.  This object takes ownership of the classifier.
  39   // Note that if classifier is null, a default instance of PhishingClassifier
  40   // will be used.
  41   static PhishingClassifierDelegate* Create(content::RenderView* render_view,
  42                                             PhishingClassifier* classifier);
  43   ~PhishingClassifierDelegate() override;
  44
  45   // Called by the RenderView once there is a phishing scorer available.
  46   // The scorer is passed on to the classifier.
  47   void SetPhishingScorer(const safe_browsing::Scorer* scorer);
  48
  49   // Called by the RenderView once a page has finished loading.  Updates the
  50   // last-loaded URL and page text, then starts classification if all other
  51   // conditions are met (see MaybeStartClassification for details).
  52   // We ignore preliminary captures, since these happen before the page has
  53   // finished loading.
  54   void PageCaptured(base::string16* page_text, bool preliminary_capture);
  55
  56   // RenderViewObserver implementation, public for testing.
  57
  58   // Called by the RenderView when a page has started loading in the given
  59   // WebFrame.  Typically, this will cause any pending classification to be
  60   // cancelled.  However, if the navigation is within the same page, we
  61   // continue running the current classification.
  62   void DidCommitProvisionalLoad(blink::WebLocalFrame* frame,
  63                                 bool is_new_navigation) override;
  64
  65  private:
  66   friend class PhishingClassifierDelegateTest;
  67
  68   PhishingClassifierDelegate(content::RenderView* render_view,
  69                              PhishingClassifier* classifier);
  70
  71   enum CancelClassificationReason {
  72     NAVIGATE_AWAY,
  73     NAVIGATE_WITHIN_PAGE,
  74     PAGE_RECAPTURED,
  75     SHUTDOWN,
  76     NEW_PHISHING_SCORER,
  77     CANCEL_CLASSIFICATION_MAX  // Always add new values before this one.
  78   };
  79
  80   // Cancels any pending classification and frees the page text.
  81   void CancelPendingClassification(CancelClassificationReason reason);
  82
  83   // RenderViewObserver implementation.
  84   bool OnMessageReceived(const IPC::Message& message) override;
  85
  86   // Called by the RenderView when it receives a StartPhishingDetection IPC
  87   // from the browser.  This signals that it is ok to begin classification
  88   // for the given toplevel URL.  If the URL has been fully loaded into the
  89   // RenderView and a Scorer has been set, this will begin classification,
  90   // otherwise classification will be deferred until these conditions are met.
  91   void OnStartPhishingDetection(const GURL& url);
  92
  93   // Called when classification for the current page finishes.
  94   void ClassificationDone(const ClientPhishingRequest& verdict);
  95
  96   // Returns the RenderView's toplevel URL.
  97   GURL GetToplevelUrl();
  98
  99   // Shared code to begin classification if all conditions are met.
 100   void MaybeStartClassification();
 101
 102   // The PhishingClassifier to use for the RenderView.  This is created once
 103   // a scorer is made available via SetPhishingScorer().
 104   scoped_ptr<PhishingClassifier> classifier_;
 105
 106   // The last URL that the browser instructed us to classify,
 107   // with the ref stripped.
 108   GURL last_url_received_from_browser_;
 109
 110   // The last top-level URL that has finished loading in the RenderView.
 111   // This corresponds to the text in classifier_page_text_.
 112   GURL last_finished_load_url_;
 113
 114   // The transition type for the last load in the main frame.  We use this
 115   // to exclude back/forward loads from classification.  Note that this is
 116   // set in DidCommitProvisionalLoad(); the transition is reset after this
 117   // call in the RenderView, so we need to save off the value.
 118   ui::PageTransition last_main_frame_transition_;
 119
 120   // The URL of the last load that we actually started classification on.
 121   // This is used to suppress phishing classification on subframe navigation
 122   // and back and forward navigations in history.
 123   GURL last_url_sent_to_classifier_;
 124
 125   // The page text that will be analyzed by the phishing classifier.  This is
 126   // set by OnNavigate and cleared when the classifier finishes.  Note that if
 127   // there is no Scorer yet when OnNavigate is called, or the browser has not
 128   // instructed us to classify the page, the page text will be cached until
 129   // these conditions are met.
 130   base::string16 classifier_page_text_;
 131
 132   // Tracks whether we have stored anything in classifier_page_text_ for the
 133   // most recent load.  We use this to distinguish empty text from cases where
 134   // PageCaptured has not been called.
 135   bool have_page_text_;
 136
 137   // Set to true if the classifier is currently running.
 138   bool is_classifying_;
 139
 140   DISALLOW_COPY_AND_ASSIGN(PhishingClassifierDelegate);
 141 };
 142
 143 }  // namespace safe_browsing
 144
 145 #endif  // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_