1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Note that although this is not a "browser" test, it runs as part of
6 // browser_tests. This is because WebKit does not work properly if it is
7 // shutdown and re-initialized. Since browser_tests runs each test in a
8 // new process, this avoids the problem.
10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
13 #include "base/bind.h"
14 #include "base/callback.h"
15 #include "base/command_line.h"
16 #include "base/compiler_specific.h"
17 #include "base/location.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/single_thread_task_runner.h"
20 #include "base/strings/string_number_conversions.h"
21 #include "base/thread_task_runner_handle.h"
22 #include "base/time/time.h"
23 #include "chrome/browser/ui/browser.h"
24 #include "chrome/browser/ui/tabs/tab_strip_model.h"
25 #include "chrome/common/chrome_switches.h"
26 #include "chrome/renderer/safe_browsing/features.h"
27 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
28 #include "chrome/renderer/safe_browsing/test_utils.h"
29 #include "chrome/test/base/in_process_browser_test.h"
30 #include "chrome/test/base/ui_test_utils.h"
31 #include "content/public/browser/interstitial_page.h"
32 #include "content/public/browser/render_view_host.h"
33 #include "content/public/browser/web_contents.h"
34 #include "content/public/renderer/render_view.h"
35 #include "content/public/test/browser_test_utils.h"
36 #include "content/public/test/test_utils.h"
37 #include "net/dns/mock_host_resolver.h"
38 #include "net/test/embedded_test_server/embedded_test_server.h"
39 #include "net/test/embedded_test_server/http_request.h"
40 #include "net/test/embedded_test_server/http_response.h"
41 #include "testing/gmock/include/gmock/gmock.h"
42 #include "third_party/WebKit/public/platform/WebString.h"
43 #include "third_party/WebKit/public/web/WebFrame.h"
44 #include "third_party/WebKit/public/web/WebScriptSource.h"
45 #include "third_party/WebKit/public/web/WebView.h"
47 using ::testing::DoAll
;
48 using ::testing::Invoke
;
49 using ::testing::Return
;
51 namespace safe_browsing
{
53 class PhishingDOMFeatureExtractorTest
: public InProcessBrowserTest
{
55 content::WebContents
* GetWebContents() {
56 return browser()->tab_strip_model()->GetActiveWebContents();
59 // Helper for the SubframeRemoval test that posts a message to remove
60 // the iframe "frame1" from the document.
61 void ScheduleRemoveIframe() {
62 base::ThreadTaskRunnerHandle::Get()->PostTask(
63 FROM_HERE
, base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe
,
64 weak_factory_
.GetWeakPtr()));
68 PhishingDOMFeatureExtractorTest()
69 : render_view_routing_id_(MSG_ROUTING_NONE
), weak_factory_(this) {}
71 ~PhishingDOMFeatureExtractorTest() override
{}
73 void SetUpCommandLine(base::CommandLine
* command_line
) override
{
74 command_line
->AppendSwitch(switches::kSingleProcess
);
76 // Don't want to try to create a GPU process.
77 command_line
->AppendSwitch(switches::kDisableGpu
);
81 void SetUpOnMainThread() override
{
82 render_view_routing_id_
=
83 GetWebContents()->GetRenderViewHost()->GetRoutingID();
84 extractor_
.reset(new PhishingDOMFeatureExtractor(&clock_
));
86 ASSERT_TRUE(StartTestServer());
87 host_resolver()->AddRule("*", "127.0.0.1");
90 // Runs the DOMFeatureExtractor on the RenderView, waiting for the
91 // completion callback. Returns the success boolean from the callback.
92 bool ExtractFeatures(FeatureMap
* features
) {
94 PostTaskToInProcessRendererAndWait(
95 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractFeaturesInternal
,
96 base::Unretained(this),
101 blink::WebFrame
* GetWebFrame() {
102 content::RenderView
* render_view
=
103 content::RenderView::FromRoutingID(render_view_routing_id_
);
104 return render_view
->GetWebView()->mainFrame();
107 void ExtractFeaturesInternal(FeatureMap
* features
) {
108 scoped_refptr
<content::MessageLoopRunner
> message_loop
=
109 new content::MessageLoopRunner
;
111 extractor_
->ExtractFeatures(
112 GetWebFrame()->document(), features
,
113 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone
,
114 base::Unretained(this), message_loop
->QuitClosure()));
118 // Completion callback for feature extraction.
119 void ExtractionDone(const base::Closure
& quit_closure
,
125 // Does the actual work of removing the iframe "frame1" from the document.
126 void RemoveIframe() {
127 blink::WebFrame
* main_frame
= GetWebFrame();
128 ASSERT_TRUE(main_frame
);
129 main_frame
->executeScript(
131 "document.body.removeChild(document.getElementById('frame1'));"));
134 bool StartTestServer() {
135 CHECK(!embedded_test_server_
);
136 embedded_test_server_
.reset(new net::test_server::EmbeddedTestServer());
137 embedded_test_server_
->RegisterRequestHandler(
138 base::Bind(&PhishingDOMFeatureExtractorTest::HandleRequest
,
139 base::Unretained(this)));
140 return embedded_test_server_
->InitializeAndWaitUntilReady();
143 scoped_ptr
<net::test_server::HttpResponse
> HandleRequest(
144 const net::test_server::HttpRequest
& request
) {
145 std::map
<std::string
, std::string
>::const_iterator host_it
=
146 request
.headers
.find("Host");
147 if (host_it
== request
.headers
.end())
148 return scoped_ptr
<net::test_server::HttpResponse
>();
151 std::string("http://") + host_it
->second
+ request
.relative_url
;
152 std::map
<std::string
, std::string
>::const_iterator it
=
153 responses_
.find(url
);
154 if (it
== responses_
.end())
155 return scoped_ptr
<net::test_server::HttpResponse
>();
157 scoped_ptr
<net::test_server::BasicHttpResponse
> http_response(
158 new net::test_server::BasicHttpResponse());
159 http_response
->set_code(net::HTTP_OK
);
160 http_response
->set_content_type("text/html");
161 http_response
->set_content(it
->second
);
162 return http_response
.Pass();
165 GURL
GetURL(const std::string
& host
, const std::string
& path
) {
166 GURL::Replacements replace
;
167 replace
.SetHostStr(host
);
168 replace
.SetPathStr(path
);
169 return embedded_test_server_
->base_url().ReplaceComponents(replace
);
172 // Returns the URL that was loaded.
173 GURL
LoadHtml(const std::string
& host
, const std::string
& content
) {
174 GURL
url(GetURL(host
, ""));
175 responses_
[url
.spec()] = content
;
176 ui_test_utils::NavigateToURL(browser(), url
);
180 int32 render_view_routing_id_
;
182 // Map of url -> response body for network requests from the renderer.
183 // Any urls not in this map are served a 404 error.
184 std::map
<std::string
, std::string
> responses_
;
186 scoped_ptr
<net::test_server::EmbeddedTestServer
> embedded_test_server_
;
187 MockFeatureExtractorClock clock_
;
188 scoped_ptr
<PhishingDOMFeatureExtractor
> extractor_
;
189 bool success_
; // holds the success value from ExtractFeatures
190 base::WeakPtrFactory
<PhishingDOMFeatureExtractorTest
> weak_factory_
;
193 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, FormFeatures
) {
194 // This test doesn't exercise the extraction timing.
195 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
197 FeatureMap expected_features
;
198 expected_features
.AddBooleanFeature(features::kPageHasForms
);
199 expected_features
.AddRealFeature(features::kPageActionOtherDomainFreq
, 0.25);
200 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
201 expected_features
.AddBooleanFeature(features::kPageHasCheckInputs
);
202 expected_features
.AddBooleanFeature(features::kPageActionURL
+
203 std::string("http://cgi.host.com/submit"));
204 expected_features
.AddBooleanFeature(features::kPageActionURL
+
205 std::string("http://other.com/"));
206 expected_features
.AddBooleanFeature(features::kPageActionURL
+
207 std::string("http://host.com:") +
208 base::IntToString(embedded_test_server_
->port()) +
209 std::string("/query"));
215 "<form action=\"query\"><input type=text><input type=checkbox></form>"
216 "<form action=\"http://cgi.host.com/submit\"></form>"
217 "<form action=\"http://other.com/\"></form>"
218 "<form action=\"query\"></form>"
219 "<form></form></body></html>");
220 ASSERT_TRUE(ExtractFeatures(&features
));
221 ExpectFeatureMapsAreEqual(features
, expected_features
);
223 expected_features
.Clear();
224 expected_features
.AddBooleanFeature(features::kPageHasRadioInputs
);
225 expected_features
.AddBooleanFeature(features::kPageHasPswdInputs
);
231 "<input type=\"radio\"><input type=password></body></html>");
232 ASSERT_TRUE(ExtractFeatures(&features
));
233 ExpectFeatureMapsAreEqual(features
, expected_features
);
235 expected_features
.Clear();
236 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
241 "<html><head><body><input></body></html>");
242 ASSERT_TRUE(ExtractFeatures(&features
));
243 ExpectFeatureMapsAreEqual(features
, expected_features
);
245 expected_features
.Clear();
246 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
251 "<html><head><body><input type=\"invalid\"></body></html>");
252 ASSERT_TRUE(ExtractFeatures(&features
));
253 ExpectFeatureMapsAreEqual(features
, expected_features
);
256 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, LinkFeatures
) {
257 // This test doesn't exercise the extraction timing.
258 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
260 FeatureMap expected_features
;
261 expected_features
.AddRealFeature(features::kPageExternalLinksFreq
, 0.5);
262 expected_features
.AddRealFeature(features::kPageSecureLinksFreq
, 0.0);
263 expected_features
.AddBooleanFeature(features::kPageLinkDomain
+
264 std::string("chromium.org"));
270 "<a href=\"http://www2.host.com/abc\">link</a>"
271 "<a name=page_anchor></a>"
272 "<a href=\"http://www.chromium.org/\">chromium</a>"
274 ASSERT_TRUE(ExtractFeatures(&features
));
275 ExpectFeatureMapsAreEqual(features
, expected_features
);
277 expected_features
.Clear();
278 expected_features
.AddRealFeature(features::kPageExternalLinksFreq
, 0.25);
279 expected_features
.AddRealFeature(features::kPageSecureLinksFreq
, 0.5);
280 expected_features
.AddBooleanFeature(features::kPageLinkDomain
+
281 std::string("chromium.org"));
283 net::SpawnedTestServer
https_server(
284 net::SpawnedTestServer::TYPE_HTTPS
,
285 net::SpawnedTestServer::kLocalhost
,
286 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
287 ASSERT_TRUE(https_server
.Start());
289 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
291 std::string url_str
= "https://host.com:";
292 url_str
+= base::IntToString(https_server
.host_port_pair().port());
293 url_str
+= "/files/safe_browsing/secure_link_features.html";
294 ui_test_utils::NavigateToURL(browser(), GURL(url_str
));
296 // Click through the certificate error interstitial.
297 content::InterstitialPage
* interstitial_page
=
298 GetWebContents()->GetInterstitialPage();
299 interstitial_page
->Proceed();
300 content::WaitForLoadStop(GetWebContents());
303 ASSERT_TRUE(ExtractFeatures(&features
));
304 ExpectFeatureMapsAreEqual(features
, expected_features
);
307 // Flaky on Win/Linux. https://crbug.com/373155.
308 #if defined(OS_WIN) || defined(OS_LINUX)
309 #define MAYBE_ScriptAndImageFeatures DISABLED_ScriptAndImageFeatures
311 #define MAYBE_ScriptAndImageFeatures ScriptAndImageFeatures
313 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
,
314 MAYBE_ScriptAndImageFeatures
) {
315 // This test doesn't exercise the extraction timing.
316 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
318 FeatureMap expected_features
;
319 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTOne
);
324 "<html><head><script></script><script></script></head></html>");
325 ASSERT_TRUE(ExtractFeatures(&features
));
326 ExpectFeatureMapsAreEqual(features
, expected_features
);
328 expected_features
.Clear();
329 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTOne
);
330 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTSix
);
331 expected_features
.AddRealFeature(features::kPageImgOtherDomainFreq
, 0.5);
334 net::SpawnedTestServer
https_server(
335 net::SpawnedTestServer::TYPE_HTTPS
,
336 net::SpawnedTestServer::kLocalhost
,
337 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
338 ASSERT_TRUE(https_server
.Start());
340 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
342 std::string url_str
= "https://host.com:";
343 url_str
+= base::IntToString(https_server
.host_port_pair().port());
344 url_str
+= "/files/safe_browsing/secure_script_and_image.html";
345 ui_test_utils::NavigateToURL(browser(), GURL(url_str
));
347 // Click through the certificate error interstitial.
348 content::InterstitialPage
* interstitial_page
=
349 GetWebContents()->GetInterstitialPage();
350 interstitial_page
->Proceed();
351 content::WaitForLoadStop(GetWebContents());
353 ASSERT_TRUE(ExtractFeatures(&features
));
354 ExpectFeatureMapsAreEqual(features
, expected_features
);
357 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, SubFrames
) {
358 // This test doesn't exercise the extraction timing.
359 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
361 // Test that features are aggregated across all frames.
363 std::string port
= base::IntToString(embedded_test_server_
->port());
364 responses_
[GetURL("host2.com", "").spec()] =
365 "<html><head><script></script><body>"
366 "<form action=\"http://host4.com/\"><input type=checkbox></form>"
367 "<form action=\"http://host2.com/submit\"></form>"
368 "<a href=\"http://www.host2.com/home\">link</a>"
369 "<iframe src=\"nested.html\"></iframe>"
372 responses_
[GetURL("host2.com", "nested.html").spec()] =
373 "<html><body><input type=password>"
374 "<a href=\"https://host4.com/\">link</a>"
375 "<a href=\"relative\">another</a>"
378 responses_
[GetURL("host3.com", "").spec()] =
379 "<html><head><script></script><body>"
380 "<img src=\"http://host.com/123.png\">"
383 FeatureMap expected_features
;
384 expected_features
.AddBooleanFeature(features::kPageHasForms
);
385 // Form action domains are compared to the URL of the document they're in,
386 // not the URL of the toplevel page. So http://host2.com/ has two form
387 // actions, one of which is external.
388 expected_features
.AddRealFeature(features::kPageActionOtherDomainFreq
, 0.5);
389 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
390 expected_features
.AddBooleanFeature(features::kPageHasPswdInputs
);
391 expected_features
.AddBooleanFeature(features::kPageHasCheckInputs
);
392 expected_features
.AddRealFeature(features::kPageExternalLinksFreq
, 0.25);
393 expected_features
.AddBooleanFeature(features::kPageLinkDomain
+
394 std::string("host4.com"));
395 expected_features
.AddRealFeature(features::kPageSecureLinksFreq
, 0.25);
396 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTOne
);
397 expected_features
.AddRealFeature(features::kPageImgOtherDomainFreq
, 1.0);
398 expected_features
.AddBooleanFeature(features::kPageActionURL
+
399 std::string("http://host2.com/submit"));
400 expected_features
.AddBooleanFeature(features::kPageActionURL
+
401 std::string("http://host4.com/"));
405 "<html><body><input type=text><a href=\"info.html\">link</a>"
406 "<iframe src=\"http://host2.com:");
410 "<iframe src=\"http://host3.com:");
412 html
+= std::string("/\"></iframe></body></html>");
414 LoadHtml("host.com", html
);
415 ASSERT_TRUE(ExtractFeatures(&features
));
416 ExpectFeatureMapsAreEqual(features
, expected_features
);
419 // Test flakes with LSAN enabled. See http://crbug.com/373155.
420 #if defined(LEAK_SANITIZER)
421 #define MAYBE_Continuation DISABLED_Continuation
423 #define MAYBE_Continuation Continuation
425 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, MAYBE_Continuation
) {
426 // For this test, we'll cause the feature extraction to run multiple
427 // iterations by incrementing the clock.
429 // This page has a total of 50 elements. For the external forms feature to
430 // be computed correctly, the extractor has to examine the whole document.
431 // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
432 // there isn't one present, which can be confusing for the element counts.
433 std::string response
= "<html><head></head><body>"
434 "<form action=\"ondomain\"></form>";
435 for (int i
= 0; i
< 45; ++i
) {
436 response
.append("<p>");
438 response
.append("<form action=\"http://host2.com/\"></form></body></html>");
440 // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
441 // Note that this assumes kClockCheckGranularity = 10 and
442 // kMaxTimePerChunkMs = 10.
443 base::TimeTicks now
= base::TimeTicks::Now();
444 EXPECT_CALL(clock_
, Now())
445 // Time check at the start of extraction.
446 .WillOnce(Return(now
))
447 // Time check at the start of the first chunk of work.
448 .WillOnce(Return(now
))
449 // Time check after the first 10 elements.
450 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(6)))
451 // Time check after the next 10 elements. This is over the chunk
452 // time limit, so a continuation task will be posted.
453 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(12)))
454 // Time check at the start of the second chunk of work.
455 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(22)))
456 // Time check after resuming iteration for the second chunk.
457 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(24)))
458 // Time check after the next 10 elements.
459 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(30)))
460 // Time check after the next 10 elements. This will trigger another
461 // continuation task.
462 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(36)))
463 // Time check at the start of the third chunk of work.
464 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(46)))
465 // Time check after resuming iteration for the third chunk.
466 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(48)))
467 // Time check after the last 10 elements.
468 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(54)))
469 // A final time check for the histograms.
470 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(56)));
472 FeatureMap expected_features
;
473 expected_features
.AddBooleanFeature(features::kPageHasForms
);
474 expected_features
.AddRealFeature(features::kPageActionOtherDomainFreq
, 0.5);
475 expected_features
.AddBooleanFeature(features::kPageActionURL
+
476 std::string("http://host.com:") +
477 base::IntToString(embedded_test_server_
->port()) +
478 std::string("/ondomain"));
479 expected_features
.AddBooleanFeature(features::kPageActionURL
+
480 std::string("http://host2.com/"));
483 LoadHtml("host.com", response
);
484 ASSERT_TRUE(ExtractFeatures(&features
));
485 ExpectFeatureMapsAreEqual(features
, expected_features
);
486 // Make sure none of the mock expectations carry over to the next test.
487 ::testing::Mock::VerifyAndClearExpectations(&clock_
);
489 // Now repeat the test with the same page, but advance the clock faster so
490 // that the extraction time exceeds the maximum total time for the feature
491 // extractor. Extraction should fail. Note that this assumes
492 // kMaxTotalTimeMs = 500.
493 EXPECT_CALL(clock_
, Now())
494 // Time check at the start of extraction.
495 .WillOnce(Return(now
))
496 // Time check at the start of the first chunk of work.
497 .WillOnce(Return(now
))
498 // Time check after the first 10 elements.
499 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(300)))
500 // Time check at the start of the second chunk of work.
501 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(350)))
502 // Time check after resuming iteration for the second chunk.
503 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(360)))
504 // Time check after the next 10 elements. This is over the limit.
505 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(600)))
506 // A final time check for the histograms.
507 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(620)));
510 EXPECT_FALSE(ExtractFeatures(&features
));
513 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, SubframeRemoval
) {
514 // In this test, we'll advance the feature extractor so that it is positioned
515 // inside an iframe, and have it pause due to exceeding the chunk time limit.
516 // Then, prior to continuation, the iframe is removed from the document.
517 // As currently implemented, this should finish extraction from the removed
519 responses_
[GetURL("host.com", "frame.html").spec()] =
520 "<html><body><p><p><p><input type=password></body></html>";
522 base::TimeTicks now
= base::TimeTicks::Now();
523 EXPECT_CALL(clock_
, Now())
524 // Time check at the start of extraction.
525 .WillOnce(Return(now
))
526 // Time check at the start of the first chunk of work.
527 .WillOnce(Return(now
))
528 // Time check after the first 10 elements. Enough time has passed
529 // to stop extraction. Schedule the iframe removal to happen as soon as
530 // the feature extractor returns control to the message loop.
532 Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe
),
533 Return(now
+ base::TimeDelta::FromMilliseconds(21))))
534 // Time check at the start of the second chunk of work.
535 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(25)))
536 // Time check after resuming iteration for the second chunk.
537 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(27)))
538 // A final time check for the histograms.
539 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(33)));
541 FeatureMap expected_features
;
542 expected_features
.AddBooleanFeature(features::kPageHasForms
);
543 expected_features
.AddBooleanFeature(features::kPageHasPswdInputs
);
548 "<html><head></head><body>"
549 "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
550 "<form></form></body></html>");
551 ASSERT_TRUE(ExtractFeatures(&features
));
552 ExpectFeatureMapsAreEqual(features
, expected_features
);
555 } // namespace safe_browsing