1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
10 #include "base/command_line.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/common/chrome_switches.h"
15 #include "chrome/common/safe_browsing/client_model.pb.h"
16 #include "chrome/common/safe_browsing/csd.pb.h"
17 #include "chrome/renderer/safe_browsing/features.h"
18 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
19 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
20 #include "chrome/renderer/safe_browsing/scorer.h"
21 #include "chrome/test/base/in_process_browser_test.h"
22 #include "chrome/test/base/ui_test_utils.h"
23 #include "content/public/renderer/render_view.h"
24 #include "crypto/sha2.h"
25 #include "net/dns/mock_host_resolver.h"
26 #include "net/test/embedded_test_server/embedded_test_server.h"
27 #include "net/test/embedded_test_server/http_response.h"
28 #include "testing/gmock/include/gmock/gmock.h"
31 using ::testing::AllOf
;
32 using ::testing::Contains
;
34 using ::testing::Pair
;
38 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
39 const int kRenderViewRoutingId
= 2;
43 namespace safe_browsing
{
45 class PhishingClassifierTest
: public InProcessBrowserTest
{
47 PhishingClassifierTest()
48 : url_tld_token_net_(features::kUrlTldToken
+ std::string("net")),
49 page_link_domain_phishing_(features::kPageLinkDomain
+
50 std::string("phishing.com")),
51 page_term_login_(features::kPageTerm
+ std::string("login")) {
54 virtual void SetUpCommandLine(CommandLine
* command_line
) OVERRIDE
{
55 command_line
->AppendSwitch(switches::kSingleProcess
);
57 // Don't want to try to create a GPU process.
58 command_line
->AppendSwitch(switches::kDisableGpu
);
62 virtual void SetUpOnMainThread() OVERRIDE
{
63 // Construct a model to test with. We include one feature from each of
64 // the feature extractors, which allows us to verify that they all ran.
65 ClientSideModel model
;
67 model
.add_hashes(crypto::SHA256HashString(url_tld_token_net_
));
68 model
.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_
));
69 model
.add_hashes(crypto::SHA256HashString(page_term_login_
));
70 model
.add_hashes(crypto::SHA256HashString("login"));
71 model
.add_hashes(crypto::SHA256HashString(features::kUrlTldToken
+
73 model
.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain
+
74 std::string("phishing.com")));
75 model
.add_hashes(crypto::SHA256HashString(features::kPageTerm
+
76 std::string("login")));
77 model
.add_hashes(crypto::SHA256HashString("login"));
79 // Add a default rule with a non-phishy weight.
80 ClientSideModel::Rule
* rule
= model
.add_rule();
81 rule
->set_weight(-1.0);
83 // To give a phishy score, the total weight needs to be >= 0
84 // (0.5 when converted to a probability). This will only happen
85 // if all of the listed features are present.
86 rule
= model
.add_rule();
90 rule
->set_weight(1.0);
92 model
.add_page_term(3);
93 model
.set_murmur_hash_seed(2777808611U);
94 model
.add_page_word(MurmurHash3String("login", model
.murmur_hash_seed()));
95 model
.set_max_words_per_term(1);
96 model
.set_max_shingles_per_page(100);
97 model
.set_shingle_size(3);
99 clock_
= new MockFeatureExtractorClock
;
100 scorer_
.reset(Scorer::Create(model
.SerializeAsString()));
101 ASSERT_TRUE(scorer_
.get());
103 classifier_
.reset(new PhishingClassifier(
104 content::RenderView::FromRoutingID(kRenderViewRoutingId
),
108 virtual void TearDownOnMainThread() OVERRIDE
{
109 content::RunAllPendingInMessageLoop();
112 // Helper method to start phishing classification and wait for it to
113 // complete. Returns the true if the page is classified as phishy and
115 bool RunPhishingClassifier(const base::string16
* page_text
,
117 FeatureMap
* features
) {
118 ClientPhishingRequest verdict
;
119 // The classifier accesses the RenderView and must run in the RenderThread.
120 PostTaskToInProcessRendererAndWait(
121 base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier
,
122 base::Unretained(this),
123 page_text
, phishy_score
, features
, &verdict
));
124 return verdict
.is_phishing();
127 void DoRunPhishingClassifier(const base::string16
* page_text
,
129 FeatureMap
* features
,
130 ClientPhishingRequest
* verdict
) {
131 *phishy_score
= PhishingClassifier::kInvalidScore
;
134 // Force synchronous behavior for ease of unittesting.
135 base::RunLoop run_loop
;
136 classifier_
->BeginClassification(
138 base::Bind(&PhishingClassifierTest::ClassificationFinished
,
139 base::Unretained(this), &run_loop
, verdict
));
140 content::RunThisRunLoop(&run_loop
);
142 *phishy_score
= verdict
->client_score();
143 for (int i
= 0; i
< verdict
->feature_map_size(); ++i
) {
144 features
->AddRealFeature(verdict
->feature_map(i
).name(),
145 verdict
->feature_map(i
).value());
149 // Completion callback for classification.
150 void ClassificationFinished(base::RunLoop
* run_loop
,
151 ClientPhishingRequest
* verdict_out
,
152 const ClientPhishingRequest
& verdict
) {
153 *verdict_out
= verdict
; // Copy the verdict.
157 scoped_ptr
<net::test_server::EmbeddedTestServer
> embedded_test_server_
;
158 net::test_server::EmbeddedTestServer
* embedded_test_server() {
159 // TODO(ajwong): Merge this into BrowserTestBase.
160 if (!embedded_test_server_
) {
161 embedded_test_server_
.reset(new net::test_server::EmbeddedTestServer());
162 embedded_test_server_
->RegisterRequestHandler(
163 base::Bind(&PhishingClassifierTest::HandleRequest
,
164 base::Unretained(this)));
165 CHECK(embedded_test_server_
->InitializeAndWaitUntilReady());
167 return embedded_test_server_
.get();
170 void LoadHtml(const std::string
& host
, const std::string
& content
) {
171 GURL::Replacements replace_host
;
172 replace_host
.SetHostStr(host
);
173 response_content_
= content
;
174 ui_test_utils::NavigateToURL(
176 embedded_test_server()->base_url().ReplaceComponents(replace_host
));
179 void LoadHtmlPost(const std::string
& host
, const std::string
& content
) {
180 GURL::Replacements replace_host
;
181 replace_host
.SetHostStr(host
);
182 response_content_
= content
;
183 ui_test_utils::NavigateToURLWithPost(
185 embedded_test_server()->base_url().ReplaceComponents(replace_host
));
188 scoped_ptr
<net::test_server::HttpResponse
>
189 HandleRequest(const net::test_server::HttpRequest
& request
) {
190 scoped_ptr
<net::test_server::BasicHttpResponse
> http_response(
191 new net::test_server::BasicHttpResponse());
192 http_response
->set_code(net::HTTP_OK
);
193 http_response
->set_content_type("text/html");
194 http_response
->set_content(response_content_
);
195 return http_response
.PassAs
<net::test_server::HttpResponse
>();
198 std::string response_content_
;
199 scoped_ptr
<Scorer
> scorer_
;
200 scoped_ptr
<PhishingClassifier
> classifier_
;
201 MockFeatureExtractorClock
* clock_
; // Owned by classifier_.
203 // Features that are in the model.
204 const std::string url_tld_token_net_
;
205 const std::string page_link_domain_phishing_
;
206 const std::string page_term_login_
;
209 // This test flakes on Mac with force compositing mode.
210 // http://crbug.com/316709
211 #if defined(OS_MACOSX)
212 #define MAYBE_TestClassification DISABLED_TestClassification
214 #define MAYBE_TestClassification TestClassification
216 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest
, MAYBE_TestClassification
) {
217 host_resolver()->AddRule("*", "127.0.0.1");
219 // No scorer yet, so the classifier is not ready.
220 ASSERT_FALSE(classifier_
->is_ready());
222 // Now set the scorer.
223 classifier_
->set_phishing_scorer(scorer_
.get());
224 ASSERT_TRUE(classifier_
->is_ready());
226 // This test doesn't exercise the extraction timing.
227 EXPECT_CALL(*clock_
, Now())
228 .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
230 base::string16 page_text
= base::ASCIIToUTF16("login");
235 "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
236 EXPECT_TRUE(RunPhishingClassifier(&page_text
, &phishy_score
, &features
));
237 // Note: features.features() might contain other features that simply aren't
239 EXPECT_THAT(features
.features(),
240 AllOf(Contains(Pair(url_tld_token_net_
, 1.0)),
241 Contains(Pair(page_link_domain_phishing_
, 1.0)),
242 Contains(Pair(page_term_login_
, 1.0))));
243 EXPECT_FLOAT_EQ(0.5, phishy_score
);
245 // Change the link domain to something non-phishy.
247 "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
248 EXPECT_FALSE(RunPhishingClassifier(&page_text
, &phishy_score
, &features
));
249 EXPECT_THAT(features
.features(),
250 AllOf(Contains(Pair(url_tld_token_net_
, 1.0)),
251 Contains(Pair(page_term_login_
, 1.0))));
252 EXPECT_THAT(features
.features(),
253 Not(Contains(Pair(page_link_domain_phishing_
, 1.0))));
254 EXPECT_GE(phishy_score
, 0.0);
255 EXPECT_LT(phishy_score
, 0.5);
257 // Extraction should fail for this case since there is no TLD.
258 LoadHtml("localhost", "<html><body>content</body></html>");
259 EXPECT_FALSE(RunPhishingClassifier(&page_text
, &phishy_score
, &features
));
260 EXPECT_EQ(0U, features
.features().size());
261 EXPECT_EQ(PhishingClassifier::kInvalidScore
, phishy_score
);
263 // Extraction should also fail for this case because the URL is not http.
264 net::SpawnedTestServer
https_server(
265 net::SpawnedTestServer::TYPE_HTTPS
,
266 net::SpawnedTestServer::kLocalhost
,
267 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
268 ASSERT_TRUE(https_server
.Start());
269 std::string
host_str("host.net"); // Must outlive replace_host.
270 GURL::Replacements replace_host
;
271 replace_host
.SetHostStr(host_str
);
272 GURL test_url
= https_server
.GetURL("/files/title1.html");
273 ui_test_utils::NavigateToURL(browser(),
274 test_url
.ReplaceComponents(replace_host
));
275 EXPECT_FALSE(RunPhishingClassifier(&page_text
, &phishy_score
, &features
));
276 EXPECT_EQ(0U, features
.features().size());
277 EXPECT_EQ(PhishingClassifier::kInvalidScore
, phishy_score
);
279 // Extraction should fail for this case because the URL is a POST request.
280 LoadHtmlPost("host.net", "<html><body>content</body></html>");
281 EXPECT_FALSE(RunPhishingClassifier(&page_text
, &phishy_score
, &features
));
282 EXPECT_EQ(0U, features
.features().size());
283 EXPECT_EQ(PhishingClassifier::kInvalidScore
, phishy_score
);
286 // Test flakes with LSAN enabled. See http://crbug.com/373155.
287 #if defined(LEAK_SANITIZER)
288 #define MAYBE_DisableDetection DISABLED_DisableDetection
290 #define MAYBE_DisableDetection DisableDetection
292 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest
, MAYBE_DisableDetection
) {
293 // No scorer yet, so the classifier is not ready.
294 EXPECT_FALSE(classifier_
->is_ready());
296 // Now set the scorer.
297 classifier_
->set_phishing_scorer(scorer_
.get());
298 EXPECT_TRUE(classifier_
->is_ready());
300 // Set a NULL scorer, which turns detection back off.
301 classifier_
->set_phishing_scorer(NULL
);
302 EXPECT_FALSE(classifier_
->is_ready());
305 } // namespace safe_browsing