1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Note that although this is not a "browser" test, it runs as part of
6 // browser_tests. This is because WebKit does not work properly if it is
7 // shutdown and re-initialized. Since browser_tests runs each test in a
8 // new process, this avoids the problem.
10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
12 #include "base/bind.h"
13 #include "base/callback.h"
14 #include "base/command_line.h"
15 #include "base/compiler_specific.h"
16 #include "base/memory/weak_ptr.h"
17 #include "base/message_loop/message_loop.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/ui/browser.h"
21 #include "chrome/browser/ui/tabs/tab_strip_model.h"
22 #include "chrome/common/chrome_switches.h"
23 #include "chrome/renderer/safe_browsing/features.h"
24 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
25 #include "chrome/renderer/safe_browsing/test_utils.h"
26 #include "chrome/test/base/in_process_browser_test.h"
27 #include "chrome/test/base/ui_test_utils.h"
28 #include "content/public/browser/interstitial_page.h"
29 #include "content/public/browser/web_contents.h"
30 #include "content/public/renderer/render_view.h"
31 #include "content/public/test/browser_test_utils.h"
32 #include "content/public/test/test_utils.h"
33 #include "net/dns/mock_host_resolver.h"
34 #include "net/test/embedded_test_server/embedded_test_server.h"
35 #include "net/test/embedded_test_server/http_request.h"
36 #include "net/test/embedded_test_server/http_response.h"
37 #include "testing/gmock/include/gmock/gmock.h"
38 #include "third_party/WebKit/public/platform/WebString.h"
39 #include "third_party/WebKit/public/web/WebFrame.h"
40 #include "third_party/WebKit/public/web/WebScriptSource.h"
41 #include "third_party/WebKit/public/web/WebView.h"
43 using ::testing::DoAll
;
44 using ::testing::Invoke
;
45 using ::testing::Return
;
49 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
50 const int kRenderViewRoutingId
= 2;
54 namespace safe_browsing
{
56 class PhishingDOMFeatureExtractorTest
: public InProcessBrowserTest
{
58 content::WebContents
* GetWebContents() {
59 return browser()->tab_strip_model()->GetActiveWebContents();
62 // Helper for the SubframeRemoval test that posts a message to remove
63 // the iframe "frame1" from the document.
64 void ScheduleRemoveIframe() {
65 base::MessageLoop::current()->PostTask(
67 base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe
,
68 weak_factory_
.GetWeakPtr()));
72 PhishingDOMFeatureExtractorTest() : weak_factory_(this) {}
74 ~PhishingDOMFeatureExtractorTest() override
{}
76 void SetUpCommandLine(base::CommandLine
* command_line
) override
{
77 command_line
->AppendSwitch(switches::kSingleProcess
);
79 // Don't want to try to create a GPU process.
80 command_line
->AppendSwitch(switches::kDisableGpu
);
84 void SetUpOnMainThread() override
{
85 extractor_
.reset(new PhishingDOMFeatureExtractor(
86 content::RenderView::FromRoutingID(kRenderViewRoutingId
), &clock_
));
88 ASSERT_TRUE(StartTestServer());
89 host_resolver()->AddRule("*", "127.0.0.1");
92 // Runs the DOMFeatureExtractor on the RenderView, waiting for the
93 // completion callback. Returns the success boolean from the callback.
94 bool ExtractFeatures(FeatureMap
* features
) {
96 PostTaskToInProcessRendererAndWait(
97 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractFeaturesInternal
,
98 base::Unretained(this),
103 void ExtractFeaturesInternal(FeatureMap
* features
) {
104 scoped_refptr
<content::MessageLoopRunner
> message_loop
=
105 new content::MessageLoopRunner
;
106 extractor_
->ExtractFeatures(
108 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone
,
109 base::Unretained(this),
110 message_loop
->QuitClosure()));
114 // Completion callback for feature extraction.
115 void ExtractionDone(const base::Closure
& quit_closure
,
121 // Does the actual work of removing the iframe "frame1" from the document.
122 void RemoveIframe() {
123 content::RenderView
* render_view
=
124 content::RenderView::FromRoutingID(kRenderViewRoutingId
);
125 blink::WebFrame
* main_frame
= render_view
->GetWebView()->mainFrame();
126 ASSERT_TRUE(main_frame
);
127 main_frame
->executeScript(
129 "document.body.removeChild(document.getElementById('frame1'));"));
132 bool StartTestServer() {
133 CHECK(!embedded_test_server_
);
134 embedded_test_server_
.reset(new net::test_server::EmbeddedTestServer());
135 embedded_test_server_
->RegisterRequestHandler(
136 base::Bind(&PhishingDOMFeatureExtractorTest::HandleRequest
,
137 base::Unretained(this)));
138 return embedded_test_server_
->InitializeAndWaitUntilReady();
141 scoped_ptr
<net::test_server::HttpResponse
> HandleRequest(
142 const net::test_server::HttpRequest
& request
) {
143 std::map
<std::string
, std::string
>::const_iterator host_it
=
144 request
.headers
.find("Host");
145 if (host_it
== request
.headers
.end())
146 return scoped_ptr
<net::test_server::HttpResponse
>();
149 std::string("http://") + host_it
->second
+ request
.relative_url
;
150 std::map
<std::string
, std::string
>::const_iterator it
=
151 responses_
.find(url
);
152 if (it
== responses_
.end())
153 return scoped_ptr
<net::test_server::HttpResponse
>();
155 scoped_ptr
<net::test_server::BasicHttpResponse
> http_response(
156 new net::test_server::BasicHttpResponse());
157 http_response
->set_code(net::HTTP_OK
);
158 http_response
->set_content_type("text/html");
159 http_response
->set_content(it
->second
);
160 return http_response
.Pass();
163 GURL
GetURL(const std::string
& host
, const std::string
& path
) {
164 GURL::Replacements replace
;
165 replace
.SetHostStr(host
);
166 replace
.SetPathStr(path
);
167 return embedded_test_server_
->base_url().ReplaceComponents(replace
);
170 // Returns the URL that was loaded.
171 GURL
LoadHtml(const std::string
& host
, const std::string
& content
) {
172 GURL
url(GetURL(host
, ""));
173 responses_
[url
.spec()] = content
;
174 ui_test_utils::NavigateToURL(browser(), url
);
178 // Map of url -> response body for network requests from the renderer.
179 // Any urls not in this map are served a 404 error.
180 std::map
<std::string
, std::string
> responses_
;
182 scoped_ptr
<net::test_server::EmbeddedTestServer
> embedded_test_server_
;
183 MockFeatureExtractorClock clock_
;
184 scoped_ptr
<PhishingDOMFeatureExtractor
> extractor_
;
185 bool success_
; // holds the success value from ExtractFeatures
186 base::WeakPtrFactory
<PhishingDOMFeatureExtractorTest
> weak_factory_
;
189 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, FormFeatures
) {
190 // This test doesn't exercise the extraction timing.
191 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
193 FeatureMap expected_features
;
194 expected_features
.AddBooleanFeature(features::kPageHasForms
);
195 expected_features
.AddRealFeature(features::kPageActionOtherDomainFreq
, 0.25);
196 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
197 expected_features
.AddBooleanFeature(features::kPageHasCheckInputs
);
203 "<form action=\"query\"><input type=text><input type=checkbox></form>"
204 "<form action=\"http://cgi.host.com/submit\"></form>"
205 "<form action=\"http://other.com/\"></form>"
206 "<form action=\"query\"></form>"
207 "<form></form></body></html>");
208 ASSERT_TRUE(ExtractFeatures(&features
));
209 ExpectFeatureMapsAreEqual(features
, expected_features
);
211 expected_features
.Clear();
212 expected_features
.AddBooleanFeature(features::kPageHasRadioInputs
);
213 expected_features
.AddBooleanFeature(features::kPageHasPswdInputs
);
219 "<input type=\"radio\"><input type=password></body></html>");
220 ASSERT_TRUE(ExtractFeatures(&features
));
221 ExpectFeatureMapsAreEqual(features
, expected_features
);
223 expected_features
.Clear();
224 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
229 "<html><head><body><input></body></html>");
230 ASSERT_TRUE(ExtractFeatures(&features
));
231 ExpectFeatureMapsAreEqual(features
, expected_features
);
233 expected_features
.Clear();
234 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
239 "<html><head><body><input type=\"invalid\"></body></html>");
240 ASSERT_TRUE(ExtractFeatures(&features
));
241 ExpectFeatureMapsAreEqual(features
, expected_features
);
244 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, LinkFeatures
) {
245 // This test doesn't exercise the extraction timing.
246 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
248 FeatureMap expected_features
;
249 expected_features
.AddRealFeature(features::kPageExternalLinksFreq
, 0.5);
250 expected_features
.AddRealFeature(features::kPageSecureLinksFreq
, 0.0);
251 expected_features
.AddBooleanFeature(features::kPageLinkDomain
+
252 std::string("chromium.org"));
258 "<a href=\"http://www2.host.com/abc\">link</a>"
259 "<a name=page_anchor></a>"
260 "<a href=\"http://www.chromium.org/\">chromium</a>"
262 ASSERT_TRUE(ExtractFeatures(&features
));
263 ExpectFeatureMapsAreEqual(features
, expected_features
);
265 expected_features
.Clear();
266 expected_features
.AddRealFeature(features::kPageExternalLinksFreq
, 0.25);
267 expected_features
.AddRealFeature(features::kPageSecureLinksFreq
, 0.5);
268 expected_features
.AddBooleanFeature(features::kPageLinkDomain
+
269 std::string("chromium.org"));
271 net::SpawnedTestServer
https_server(
272 net::SpawnedTestServer::TYPE_HTTPS
,
273 net::SpawnedTestServer::kLocalhost
,
274 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
275 ASSERT_TRUE(https_server
.Start());
277 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
279 std::string url_str
= "https://host.com:";
280 url_str
+= base::IntToString(https_server
.host_port_pair().port());
281 url_str
+= "/files/safe_browsing/secure_link_features.html";
282 ui_test_utils::NavigateToURL(browser(), GURL(url_str
));
284 // Click through the certificate error interstitial.
285 content::InterstitialPage
* interstitial_page
=
286 GetWebContents()->GetInterstitialPage();
287 interstitial_page
->Proceed();
288 content::WaitForLoadStop(GetWebContents());
291 ASSERT_TRUE(ExtractFeatures(&features
));
292 ExpectFeatureMapsAreEqual(features
, expected_features
);
295 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
,
296 ScriptAndImageFeatures
) {
297 // This test doesn't exercise the extraction timing.
298 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
300 FeatureMap expected_features
;
301 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTOne
);
306 "<html><head><script></script><script></script></head></html>");
307 ASSERT_TRUE(ExtractFeatures(&features
));
308 ExpectFeatureMapsAreEqual(features
, expected_features
);
310 expected_features
.Clear();
311 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTOne
);
312 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTSix
);
313 expected_features
.AddRealFeature(features::kPageImgOtherDomainFreq
, 0.5);
316 net::SpawnedTestServer
https_server(
317 net::SpawnedTestServer::TYPE_HTTPS
,
318 net::SpawnedTestServer::kLocalhost
,
319 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
320 ASSERT_TRUE(https_server
.Start());
322 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
324 std::string url_str
= "https://host.com:";
325 url_str
+= base::IntToString(https_server
.host_port_pair().port());
326 url_str
+= "/files/safe_browsing/secure_script_and_image.html";
327 ui_test_utils::NavigateToURL(browser(), GURL(url_str
));
329 // Click through the certificate error interstitial.
330 content::InterstitialPage
* interstitial_page
=
331 GetWebContents()->GetInterstitialPage();
332 interstitial_page
->Proceed();
333 content::WaitForLoadStop(GetWebContents());
335 ASSERT_TRUE(ExtractFeatures(&features
));
336 ExpectFeatureMapsAreEqual(features
, expected_features
);
339 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, SubFrames
) {
340 // This test doesn't exercise the extraction timing.
341 EXPECT_CALL(clock_
, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
343 // Test that features are aggregated across all frames.
345 std::string port
= base::IntToString(embedded_test_server_
->port());
346 responses_
[GetURL("host2.com", "").spec()] =
347 "<html><head><script></script><body>"
348 "<form action=\"http://host4.com/\"><input type=checkbox></form>"
349 "<form action=\"http://host2.com/submit\"></form>"
350 "<a href=\"http://www.host2.com/home\">link</a>"
351 "<iframe src=\"nested.html\"></iframe>"
354 responses_
[GetURL("host2.com", "nested.html").spec()] =
355 "<html><body><input type=password>"
356 "<a href=\"https://host4.com/\">link</a>"
357 "<a href=\"relative\">another</a>"
360 responses_
[GetURL("host3.com", "").spec()] =
361 "<html><head><script></script><body>"
362 "<img src=\"http://host.com/123.png\">"
365 FeatureMap expected_features
;
366 expected_features
.AddBooleanFeature(features::kPageHasForms
);
367 // Form action domains are compared to the URL of the document they're in,
368 // not the URL of the toplevel page. So http://host2.com/ has two form
369 // actions, one of which is external.
370 expected_features
.AddRealFeature(features::kPageActionOtherDomainFreq
, 0.5);
371 expected_features
.AddBooleanFeature(features::kPageHasTextInputs
);
372 expected_features
.AddBooleanFeature(features::kPageHasPswdInputs
);
373 expected_features
.AddBooleanFeature(features::kPageHasCheckInputs
);
374 expected_features
.AddRealFeature(features::kPageExternalLinksFreq
, 0.25);
375 expected_features
.AddBooleanFeature(features::kPageLinkDomain
+
376 std::string("host4.com"));
377 expected_features
.AddRealFeature(features::kPageSecureLinksFreq
, 0.25);
378 expected_features
.AddBooleanFeature(features::kPageNumScriptTagsGTOne
);
379 expected_features
.AddRealFeature(features::kPageImgOtherDomainFreq
, 1.0);
383 "<html><body><input type=text><a href=\"info.html\">link</a>"
384 "<iframe src=\"http://host2.com:");
388 "<iframe src=\"http://host3.com:");
390 html
+= std::string("/\"></iframe></body></html>");
392 LoadHtml("host.com", html
);
393 ASSERT_TRUE(ExtractFeatures(&features
));
394 ExpectFeatureMapsAreEqual(features
, expected_features
);
397 // Test flakes with LSAN enabled. See http://crbug.com/373155.
398 #if defined(LEAK_SANITIZER)
399 #define MAYBE_Continuation DISABLED_Continuation
401 #define MAYBE_Continuation Continuation
403 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, MAYBE_Continuation
) {
404 // For this test, we'll cause the feature extraction to run multiple
405 // iterations by incrementing the clock.
407 // This page has a total of 50 elements. For the external forms feature to
408 // be computed correctly, the extractor has to examine the whole document.
409 // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
410 // there isn't one present, which can be confusing for the element counts.
411 std::string response
= "<html><head></head><body>"
412 "<form action=\"ondomain\"></form>";
413 for (int i
= 0; i
< 45; ++i
) {
414 response
.append("<p>");
416 response
.append("<form action=\"http://host2.com/\"></form></body></html>");
418 // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
419 // Note that this assumes kClockCheckGranularity = 10 and
420 // kMaxTimePerChunkMs = 10.
421 base::TimeTicks now
= base::TimeTicks::Now();
422 EXPECT_CALL(clock_
, Now())
423 // Time check at the start of extraction.
424 .WillOnce(Return(now
))
425 // Time check at the start of the first chunk of work.
426 .WillOnce(Return(now
))
427 // Time check after the first 10 elements.
428 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(6)))
429 // Time check after the next 10 elements. This is over the chunk
430 // time limit, so a continuation task will be posted.
431 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(12)))
432 // Time check at the start of the second chunk of work.
433 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(22)))
434 // Time check after resuming iteration for the second chunk.
435 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(24)))
436 // Time check after the next 10 elements.
437 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(30)))
438 // Time check after the next 10 elements. This will trigger another
439 // continuation task.
440 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(36)))
441 // Time check at the start of the third chunk of work.
442 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(46)))
443 // Time check after resuming iteration for the third chunk.
444 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(48)))
445 // Time check after the last 10 elements.
446 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(54)))
447 // A final time check for the histograms.
448 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(56)));
450 FeatureMap expected_features
;
451 expected_features
.AddBooleanFeature(features::kPageHasForms
);
452 expected_features
.AddRealFeature(features::kPageActionOtherDomainFreq
, 0.5);
455 LoadHtml("host.com", response
);
456 ASSERT_TRUE(ExtractFeatures(&features
));
457 ExpectFeatureMapsAreEqual(features
, expected_features
);
458 // Make sure none of the mock expectations carry over to the next test.
459 ::testing::Mock::VerifyAndClearExpectations(&clock_
);
461 // Now repeat the test with the same page, but advance the clock faster so
462 // that the extraction time exceeds the maximum total time for the feature
463 // extractor. Extraction should fail. Note that this assumes
464 // kMaxTotalTimeMs = 500.
465 EXPECT_CALL(clock_
, Now())
466 // Time check at the start of extraction.
467 .WillOnce(Return(now
))
468 // Time check at the start of the first chunk of work.
469 .WillOnce(Return(now
))
470 // Time check after the first 10 elements.
471 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(300)))
472 // Time check at the start of the second chunk of work.
473 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(350)))
474 // Time check after resuming iteration for the second chunk.
475 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(360)))
476 // Time check after the next 10 elements. This is over the limit.
477 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(600)))
478 // A final time check for the histograms.
479 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(620)));
482 EXPECT_FALSE(ExtractFeatures(&features
));
485 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest
, SubframeRemoval
) {
486 // In this test, we'll advance the feature extractor so that it is positioned
487 // inside an iframe, and have it pause due to exceeding the chunk time limit.
488 // Then, prior to continuation, the iframe is removed from the document.
489 // As currently implemented, this should finish extraction from the removed
491 responses_
[GetURL("host.com", "frame.html").spec()] =
492 "<html><body><p><p><p><input type=password></body></html>";
494 base::TimeTicks now
= base::TimeTicks::Now();
495 EXPECT_CALL(clock_
, Now())
496 // Time check at the start of extraction.
497 .WillOnce(Return(now
))
498 // Time check at the start of the first chunk of work.
499 .WillOnce(Return(now
))
500 // Time check after the first 10 elements. Enough time has passed
501 // to stop extraction. Schedule the iframe removal to happen as soon as
502 // the feature extractor returns control to the message loop.
504 Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe
),
505 Return(now
+ base::TimeDelta::FromMilliseconds(21))))
506 // Time check at the start of the second chunk of work.
507 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(25)))
508 // Time check after resuming iteration for the second chunk.
509 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(27)))
510 // A final time check for the histograms.
511 .WillOnce(Return(now
+ base::TimeDelta::FromMilliseconds(33)));
513 FeatureMap expected_features
;
514 expected_features
.AddBooleanFeature(features::kPageHasForms
);
515 expected_features
.AddBooleanFeature(features::kPageHasPswdInputs
);
520 "<html><head></head><body>"
521 "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
522 "<form></form></body></html>");
523 ASSERT_TRUE(ExtractFeatures(&features
));
524 ExpectFeatureMapsAreEqual(features
, expected_features
);
527 } // namespace safe_browsing