Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / chrome / renderer / safe_browsing / phishing_classifier_delegate.cc
blob4993280f416b3f2364a106a9e0a9d5d3d38d3a58
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
7 #include <set>
9 #include "base/bind.h"
10 #include "base/callback.h"
11 #include "base/lazy_instance.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "chrome/common/safe_browsing/csd.pb.h"
15 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
16 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
17 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
18 #include "chrome/renderer/safe_browsing/scorer.h"
19 #include "content/public/renderer/document_state.h"
20 #include "content/public/renderer/navigation_state.h"
21 #include "content/public/renderer/render_thread.h"
22 #include "content/public/renderer/render_view.h"
23 #include "third_party/WebKit/public/platform/WebURL.h"
24 #include "third_party/WebKit/public/web/WebDocument.h"
25 #include "third_party/WebKit/public/web/WebLocalFrame.h"
26 #include "third_party/WebKit/public/web/WebView.h"
28 using content::DocumentState;
29 using content::NavigationState;
30 using content::RenderThread;
32 namespace safe_browsing {
34 static GURL StripRef(const GURL& url) {
35 GURL::Replacements replacements;
36 replacements.ClearRef();
37 return url.ReplaceComponents(replacements);
40 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;
41 static base::LazyInstance<PhishingClassifierDelegates>
42 g_delegates = LAZY_INSTANCE_INITIALIZER;
44 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> >
45 g_phishing_scorer = LAZY_INSTANCE_INITIALIZER;
47 // static
48 PhishingClassifierFilter* PhishingClassifierFilter::Create() {
49 // Private constructor and public static Create() method to facilitate
50 // stubbing out this class for binary-size reduction purposes.
51 return new PhishingClassifierFilter();
54 PhishingClassifierFilter::PhishingClassifierFilter()
55 : RenderProcessObserver() {}
57 PhishingClassifierFilter::~PhishingClassifierFilter() {}
59 bool PhishingClassifierFilter::OnControlMessageReceived(
60 const IPC::Message& message) {
61 bool handled = true;
62 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierFilter, message)
63 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_SetPhishingModel, OnSetPhishingModel)
64 IPC_MESSAGE_UNHANDLED(handled = false)
65 IPC_END_MESSAGE_MAP()
66 return handled;
69 void PhishingClassifierFilter::OnSetPhishingModel(const std::string& model) {
70 safe_browsing::Scorer* scorer = NULL;
71 // An empty model string means we should disable client-side phishing
72 // detection.
73 if (!model.empty()) {
74 scorer = safe_browsing::Scorer::Create(model);
75 if (!scorer) {
76 DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?";
77 return;
80 PhishingClassifierDelegates::iterator i;
81 for (i = g_delegates.Get().begin(); i != g_delegates.Get().end(); ++i) {
82 (*i)->SetPhishingScorer(scorer);
84 g_phishing_scorer.Get().reset(scorer);
87 // static
88 PhishingClassifierDelegate* PhishingClassifierDelegate::Create(
89 content::RenderView* render_view, PhishingClassifier* classifier) {
90 // Private constructor and public static Create() method to facilitate
91 // stubbing out this class for binary-size reduction purposes.
92 return new PhishingClassifierDelegate(render_view, classifier);
95 PhishingClassifierDelegate::PhishingClassifierDelegate(
96 content::RenderView* render_view,
97 PhishingClassifier* classifier)
98 : content::RenderViewObserver(render_view),
99 last_main_frame_transition_(ui::PAGE_TRANSITION_LINK),
100 have_page_text_(false),
101 is_classifying_(false) {
102 g_delegates.Get().insert(this);
103 if (!classifier) {
104 classifier = new PhishingClassifier(render_view,
105 new FeatureExtractorClock());
108 classifier_.reset(classifier);
110 if (g_phishing_scorer.Get().get())
111 SetPhishingScorer(g_phishing_scorer.Get().get());
114 PhishingClassifierDelegate::~PhishingClassifierDelegate() {
115 CancelPendingClassification(SHUTDOWN);
116 g_delegates.Get().erase(this);
119 void PhishingClassifierDelegate::SetPhishingScorer(
120 const safe_browsing::Scorer* scorer) {
121 if (!render_view()->GetWebView())
122 return; // RenderView is tearing down.
123 if (is_classifying_) {
124 // If there is a classification going on right now it means we're
125 // actually replacing an existing scorer with a new model. In
126 // this case we simply cancel the current classification.
127 // TODO(noelutz): if this happens too frequently we could also
128 // replace the old scorer with the new one once classification is done
129 // but this would complicate the code somewhat.
130 CancelPendingClassification(NEW_PHISHING_SCORER);
132 classifier_->set_phishing_scorer(scorer);
133 // Start classifying the current page if all conditions are met.
134 // See MaybeStartClassification() for details.
135 MaybeStartClassification();
138 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
139 last_url_received_from_browser_ = StripRef(url);
140 // Start classifying the current page if all conditions are met.
141 // See MaybeStartClassification() for details.
142 MaybeStartClassification();
145 void PhishingClassifierDelegate::DidCommitProvisionalLoad(
146 blink::WebLocalFrame* frame, bool is_new_navigation) {
147 // A new page is starting to load, so cancel classificaiton.
149 // TODO(bryner): We shouldn't need to cancel classification if the navigation
150 // is within the same page. However, if we let classification continue in
151 // this case, we need to properly deal with the fact that PageCaptured will
152 // be called again for the in-page navigation. We need to be sure not to
153 // swap out the page text while the term feature extractor is still running.
154 DocumentState* document_state = DocumentState::FromDataSource(
155 frame->dataSource());
156 NavigationState* navigation_state = document_state->navigation_state();
157 CancelPendingClassification(navigation_state->WasWithinSamePage()
158 ? NAVIGATE_WITHIN_PAGE
159 : NAVIGATE_AWAY);
160 if (frame == render_view()->GetWebView()->mainFrame()) {
161 last_main_frame_transition_ = navigation_state->GetTransitionType();
165 void PhishingClassifierDelegate::PageCaptured(base::string16* page_text,
166 bool preliminary_capture) {
167 if (preliminary_capture) {
168 return;
170 // Make sure there's no classification in progress. We don't want to swap
171 // out the page text string from underneath the term feature extractor.
173 // Note: Currently, if the url hasn't changed, we won't restart
174 // classification in this case. We may want to adjust this.
175 CancelPendingClassification(PAGE_RECAPTURED);
176 last_finished_load_url_ = GetToplevelUrl();
177 classifier_page_text_.swap(*page_text);
178 have_page_text_ = true;
179 MaybeStartClassification();
182 void PhishingClassifierDelegate::CancelPendingClassification(
183 CancelClassificationReason reason) {
184 if (is_classifying_) {
185 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.CancelClassificationReason",
186 reason,
187 CANCEL_CLASSIFICATION_MAX);
188 is_classifying_ = false;
190 if (classifier_->is_ready()) {
191 classifier_->CancelPendingClassification();
193 classifier_page_text_.clear();
194 have_page_text_ = false;
197 bool PhishingClassifierDelegate::OnMessageReceived(
198 const IPC::Message& message) {
199 bool handled = true;
200 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)
201 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_StartPhishingDetection,
202 OnStartPhishingDetection)
203 IPC_MESSAGE_UNHANDLED(handled = false)
204 IPC_END_MESSAGE_MAP()
205 return handled;
208 void PhishingClassifierDelegate::ClassificationDone(
209 const ClientPhishingRequest& verdict) {
210 // We no longer need the page text.
211 classifier_page_text_.clear();
212 DVLOG(2) << "Phishy verdict = " << verdict.is_phishing()
213 << " score = " << verdict.client_score();
214 if (verdict.client_score() != PhishingClassifier::kInvalidScore) {
215 DCHECK_EQ(last_url_sent_to_classifier_.spec(), verdict.url());
216 RenderThread::Get()->Send(new SafeBrowsingHostMsg_PhishingDetectionDone(
217 routing_id(), verdict.SerializeAsString()));
221 GURL PhishingClassifierDelegate::GetToplevelUrl() {
222 return render_view()->GetWebView()->mainFrame()->document().url();
225 void PhishingClassifierDelegate::MaybeStartClassification() {
226 // We can begin phishing classification when the following conditions are
227 // met:
228 // 1. A Scorer has been created
229 // 2. The browser has sent a StartPhishingDetection message for the current
230 // toplevel URL.
231 // 3. The page has finished loading and the page text has been extracted.
232 // 4. The load is a new navigation (not a session history navigation).
233 // 5. The toplevel URL has not already been classified.
235 // Note that if we determine that this particular navigation should not be
236 // classified at all (as opposed to deferring it until we get an IPC or the
237 // load completes), we discard the page text since it won't be needed.
238 if (!classifier_->is_ready()) {
239 DVLOG(2) << "Not starting classification, no Scorer created.";
240 // Keep classifier_page_text_, in case a Scorer is set later.
241 return;
244 if (last_main_frame_transition_ & ui::PAGE_TRANSITION_FORWARD_BACK) {
245 // Skip loads from session history navigation. However, update the
246 // last URL sent to the classifier, so that we'll properly detect
247 // in-page navigations.
248 DVLOG(2) << "Not starting classification for back/forward navigation";
249 last_url_sent_to_classifier_ = last_finished_load_url_;
250 classifier_page_text_.clear(); // we won't need this.
251 have_page_text_ = false;
252 return;
255 GURL stripped_last_load_url(StripRef(last_finished_load_url_));
256 if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) {
257 // We've already classified this toplevel URL, so this was likely an
258 // in-page navigation or a subframe navigation. The browser should not
259 // send a StartPhishingDetection IPC in this case.
260 DVLOG(2) << "Toplevel URL is unchanged, not starting classification.";
261 classifier_page_text_.clear(); // we won't need this.
262 have_page_text_ = false;
263 return;
266 if (!have_page_text_) {
267 DVLOG(2) << "Not starting classification, there is no page text ready.";
268 return;
271 if (last_url_received_from_browser_ != stripped_last_load_url) {
272 // The browser has not yet confirmed that this URL should be classified,
273 // so defer classification for now. Note: the ref does not affect
274 // any of the browser's preclassification checks, so we don't require it
275 // to match.
276 DVLOG(2) << "Not starting classification, last url from browser is "
277 << last_url_received_from_browser_ << ", last finished load is "
278 << last_finished_load_url_;
279 // Keep classifier_page_text_, in case the browser notifies us later that
280 // we should classify the URL.
281 return;
284 DVLOG(2) << "Starting classification for " << last_finished_load_url_;
285 last_url_sent_to_classifier_ = last_finished_load_url_;
286 is_classifying_ = true;
287 classifier_->BeginClassification(
288 &classifier_page_text_,
289 base::Bind(&PhishingClassifierDelegate::ClassificationDone,
290 base::Unretained(this)));
293 } // namespace safe_browsing