Add a function to create a bookmark app from a WebApplicationInfo.
[chromium-blink-merge.git] / chrome / renderer / safe_browsing / phishing_classifier_browsertest.cc
blob9617d68ef4358a453da6f254d168c10a045d3d70
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
7 #include <string>
9 #include "base/bind.h"
10 #include "base/command_line.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/common/chrome_switches.h"
15 #include "chrome/common/safe_browsing/client_model.pb.h"
16 #include "chrome/common/safe_browsing/csd.pb.h"
17 #include "chrome/renderer/safe_browsing/features.h"
18 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
19 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
20 #include "chrome/renderer/safe_browsing/scorer.h"
21 #include "chrome/test/base/in_process_browser_test.h"
22 #include "chrome/test/base/ui_test_utils.h"
23 #include "content/public/renderer/render_view.h"
24 #include "crypto/sha2.h"
25 #include "net/dns/mock_host_resolver.h"
26 #include "net/test/embedded_test_server/embedded_test_server.h"
27 #include "net/test/embedded_test_server/http_response.h"
28 #include "testing/gmock/include/gmock/gmock.h"
29 #include "url/gurl.h"
31 using ::testing::AllOf;
32 using ::testing::Contains;
33 using ::testing::Not;
34 using ::testing::Pair;
36 namespace {
38 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
39 const int kRenderViewRoutingId = 2;
43 namespace safe_browsing {
45 class PhishingClassifierTest : public InProcessBrowserTest {
46 protected:
47 PhishingClassifierTest()
48 : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
49 page_link_domain_phishing_(features::kPageLinkDomain +
50 std::string("phishing.com")),
51 page_term_login_(features::kPageTerm + std::string("login")) {
54 virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
55 command_line->AppendSwitch(switches::kSingleProcess);
56 #if defined(OS_WIN)
57 // Don't want to try to create a GPU process.
58 command_line->AppendSwitch(switches::kDisableAcceleratedCompositing);
59 #endif
62 virtual void SetUpOnMainThread() OVERRIDE {
63 // Construct a model to test with. We include one feature from each of
64 // the feature extractors, which allows us to verify that they all ran.
65 ClientSideModel model;
67 model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
68 model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
69 model.add_hashes(crypto::SHA256HashString(page_term_login_));
70 model.add_hashes(crypto::SHA256HashString("login"));
71 model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
72 std::string("net")));
73 model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
74 std::string("phishing.com")));
75 model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
76 std::string("login")));
77 model.add_hashes(crypto::SHA256HashString("login"));
79 // Add a default rule with a non-phishy weight.
80 ClientSideModel::Rule* rule = model.add_rule();
81 rule->set_weight(-1.0);
83 // To give a phishy score, the total weight needs to be >= 0
84 // (0.5 when converted to a probability). This will only happen
85 // if all of the listed features are present.
86 rule = model.add_rule();
87 rule->add_feature(0);
88 rule->add_feature(1);
89 rule->add_feature(2);
90 rule->set_weight(1.0);
92 model.add_page_term(3);
93 model.set_murmur_hash_seed(2777808611U);
94 model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
95 model.set_max_words_per_term(1);
97 clock_ = new MockFeatureExtractorClock;
98 scorer_.reset(Scorer::Create(model.SerializeAsString()));
99 ASSERT_TRUE(scorer_.get());
101 classifier_.reset(new PhishingClassifier(
102 content::RenderView::FromRoutingID(kRenderViewRoutingId),
103 clock_));
106 virtual void TearDownOnMainThread() OVERRIDE {
107 content::RunAllPendingInMessageLoop();
110 // Helper method to start phishing classification and wait for it to
111 // complete. Returns the true if the page is classified as phishy and
112 // false otherwise.
113 bool RunPhishingClassifier(const base::string16* page_text,
114 float* phishy_score,
115 FeatureMap* features) {
116 ClientPhishingRequest verdict;
117 // The classifier accesses the RenderView and must run in the RenderThread.
118 PostTaskToInProcessRendererAndWait(
119 base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
120 base::Unretained(this),
121 page_text, phishy_score, features, &verdict));
122 return verdict.is_phishing();
125 void DoRunPhishingClassifier(const base::string16* page_text,
126 float* phishy_score,
127 FeatureMap* features,
128 ClientPhishingRequest* verdict) {
129 *phishy_score = PhishingClassifier::kInvalidScore;
130 features->Clear();
132 // Force synchronous behavior for ease of unittesting.
133 base::RunLoop run_loop;
134 classifier_->BeginClassification(
135 page_text,
136 base::Bind(&PhishingClassifierTest::ClassificationFinished,
137 base::Unretained(this), &run_loop, verdict));
138 content::RunThisRunLoop(&run_loop);
140 *phishy_score = verdict->client_score();
141 for (int i = 0; i < verdict->feature_map_size(); ++i) {
142 features->AddRealFeature(verdict->feature_map(i).name(),
143 verdict->feature_map(i).value());
147 // Completion callback for classification.
148 void ClassificationFinished(base::RunLoop* run_loop,
149 ClientPhishingRequest* verdict_out,
150 const ClientPhishingRequest& verdict) {
151 *verdict_out = verdict; // Copy the verdict.
152 run_loop->Quit();
155 scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
156 net::test_server::EmbeddedTestServer* embedded_test_server() {
157 // TODO(ajwong): Merge this into BrowserTestBase.
158 if (!embedded_test_server_) {
159 embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
160 embedded_test_server_->RegisterRequestHandler(
161 base::Bind(&PhishingClassifierTest::HandleRequest,
162 base::Unretained(this)));
163 CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
165 return embedded_test_server_.get();
168 void LoadHtml(const std::string& host, const std::string& content) {
169 GURL::Replacements replace_host;
170 replace_host.SetHostStr(host);
171 response_content_ = content;
172 ui_test_utils::NavigateToURL(
173 browser(),
174 embedded_test_server()->base_url().ReplaceComponents(replace_host));
177 void LoadHtmlPost(const std::string& host, const std::string& content) {
178 GURL::Replacements replace_host;
179 replace_host.SetHostStr(host);
180 response_content_ = content;
181 ui_test_utils::NavigateToURLWithPost(
182 browser(),
183 embedded_test_server()->base_url().ReplaceComponents(replace_host));
186 scoped_ptr<net::test_server::HttpResponse>
187 HandleRequest(const net::test_server::HttpRequest& request) {
188 scoped_ptr<net::test_server::BasicHttpResponse> http_response(
189 new net::test_server::BasicHttpResponse());
190 http_response->set_code(net::HTTP_OK);
191 http_response->set_content_type("text/html");
192 http_response->set_content(response_content_);
193 return http_response.PassAs<net::test_server::HttpResponse>();
196 std::string response_content_;
197 scoped_ptr<Scorer> scorer_;
198 scoped_ptr<PhishingClassifier> classifier_;
199 MockFeatureExtractorClock* clock_; // Owned by classifier_.
201 // Features that are in the model.
202 const std::string url_tld_token_net_;
203 const std::string page_link_domain_phishing_;
204 const std::string page_term_login_;
207 // This test flakes on Mac with force compositing mode.
208 // http://crbug.com/316709
209 #if defined(OS_MACOSX)
210 #define MAYBE_TestClassification DISABLED_TestClassification
211 #else
212 #define MAYBE_TestClassification TestClassification
213 #endif
214 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
215 host_resolver()->AddRule("*", "127.0.0.1");
217 // No scorer yet, so the classifier is not ready.
218 ASSERT_FALSE(classifier_->is_ready());
220 // Now set the scorer.
221 classifier_->set_phishing_scorer(scorer_.get());
222 ASSERT_TRUE(classifier_->is_ready());
224 // This test doesn't exercise the extraction timing.
225 EXPECT_CALL(*clock_, Now())
226 .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
228 base::string16 page_text = base::ASCIIToUTF16("login");
229 float phishy_score;
230 FeatureMap features;
232 LoadHtml("host.net",
233 "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
234 EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
235 // Note: features.features() might contain other features that simply aren't
236 // in the model.
237 EXPECT_THAT(features.features(),
238 AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
239 Contains(Pair(page_link_domain_phishing_, 1.0)),
240 Contains(Pair(page_term_login_, 1.0))));
241 EXPECT_FLOAT_EQ(0.5, phishy_score);
243 // Change the link domain to something non-phishy.
244 LoadHtml("host.net",
245 "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
246 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
247 EXPECT_THAT(features.features(),
248 AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
249 Contains(Pair(page_term_login_, 1.0))));
250 EXPECT_THAT(features.features(),
251 Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
252 EXPECT_GE(phishy_score, 0.0);
253 EXPECT_LT(phishy_score, 0.5);
255 // Extraction should fail for this case since there is no TLD.
256 LoadHtml("localhost", "<html><body>content</body></html>");
257 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
258 EXPECT_EQ(0U, features.features().size());
259 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
261 // Extraction should also fail for this case because the URL is not http.
262 net::SpawnedTestServer https_server(
263 net::SpawnedTestServer::TYPE_HTTPS,
264 net::SpawnedTestServer::kLocalhost,
265 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
266 ASSERT_TRUE(https_server.Start());
267 std::string host_str("host.net"); // Must outlive replace_host.
268 GURL::Replacements replace_host;
269 replace_host.SetHostStr(host_str);
270 GURL test_url = https_server.GetURL("/files/title1.html");
271 ui_test_utils::NavigateToURL(browser(),
272 test_url.ReplaceComponents(replace_host));
273 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
274 EXPECT_EQ(0U, features.features().size());
275 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
277 // Extraction should fail for this case because the URL is a POST request.
278 LoadHtmlPost("host.net", "<html><body>content</body></html>");
279 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
280 EXPECT_EQ(0U, features.features().size());
281 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
284 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, DisableDetection) {
285 // No scorer yet, so the classifier is not ready.
286 EXPECT_FALSE(classifier_->is_ready());
288 // Now set the scorer.
289 classifier_->set_phishing_scorer(scorer_.get());
290 EXPECT_TRUE(classifier_->is_ready());
292 // Set a NULL scorer, which turns detection back off.
293 classifier_->set_phishing_scorer(NULL);
294 EXPECT_FALSE(classifier_->is_ready());
297 } // namespace safe_browsing