Supervised user import: Listen for profile creation/deletion
[chromium-blink-merge.git] / chrome / renderer / safe_browsing / phishing_dom_feature_extractor_browsertest.cc
blob3d1814fcc771c3ffa3103015731859001a7a0d85
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Note that although this is not a "browser" test, it runs as part of
6 // browser_tests. This is because WebKit does not work properly if it is
7 // shutdown and re-initialized. Since browser_tests runs each test in a
8 // new process, this avoids the problem.
10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
12 #include "base/bind.h"
13 #include "base/callback.h"
14 #include "base/command_line.h"
15 #include "base/compiler_specific.h"
16 #include "base/memory/weak_ptr.h"
17 #include "base/message_loop/message_loop.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/ui/browser.h"
21 #include "chrome/browser/ui/tabs/tab_strip_model.h"
22 #include "chrome/common/chrome_switches.h"
23 #include "chrome/renderer/safe_browsing/features.h"
24 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
25 #include "chrome/renderer/safe_browsing/test_utils.h"
26 #include "chrome/test/base/in_process_browser_test.h"
27 #include "chrome/test/base/ui_test_utils.h"
28 #include "content/public/browser/interstitial_page.h"
29 #include "content/public/browser/web_contents.h"
30 #include "content/public/renderer/render_view.h"
31 #include "content/public/test/browser_test_utils.h"
32 #include "content/public/test/test_utils.h"
33 #include "net/dns/mock_host_resolver.h"
34 #include "net/test/embedded_test_server/embedded_test_server.h"
35 #include "net/test/embedded_test_server/http_request.h"
36 #include "net/test/embedded_test_server/http_response.h"
37 #include "testing/gmock/include/gmock/gmock.h"
38 #include "third_party/WebKit/public/platform/WebString.h"
39 #include "third_party/WebKit/public/web/WebFrame.h"
40 #include "third_party/WebKit/public/web/WebScriptSource.h"
41 #include "third_party/WebKit/public/web/WebView.h"
43 using ::testing::DoAll;
44 using ::testing::Invoke;
45 using ::testing::Return;
47 namespace {
49 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
50 const int kRenderViewRoutingId = 2;
54 namespace safe_browsing {
56 class PhishingDOMFeatureExtractorTest : public InProcessBrowserTest {
57 public:
58 content::WebContents* GetWebContents() {
59 return browser()->tab_strip_model()->GetActiveWebContents();
62 // Helper for the SubframeRemoval test that posts a message to remove
63 // the iframe "frame1" from the document.
64 void ScheduleRemoveIframe() {
65 base::MessageLoop::current()->PostTask(
66 FROM_HERE,
67 base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
68 weak_factory_.GetWeakPtr()));
71 protected:
72 PhishingDOMFeatureExtractorTest() : weak_factory_(this) {}
74 ~PhishingDOMFeatureExtractorTest() override {}
76 void SetUpCommandLine(base::CommandLine* command_line) override {
77 command_line->AppendSwitch(switches::kSingleProcess);
78 #if defined(OS_WIN)
79 // Don't want to try to create a GPU process.
80 command_line->AppendSwitch(switches::kDisableGpu);
81 #endif
84 void SetUpOnMainThread() override {
85 extractor_.reset(new PhishingDOMFeatureExtractor(
86 content::RenderView::FromRoutingID(kRenderViewRoutingId), &clock_));
88 ASSERT_TRUE(StartTestServer());
89 host_resolver()->AddRule("*", "127.0.0.1");
92 // Runs the DOMFeatureExtractor on the RenderView, waiting for the
93 // completion callback. Returns the success boolean from the callback.
94 bool ExtractFeatures(FeatureMap* features) {
95 success_ = false;
96 PostTaskToInProcessRendererAndWait(
97 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractFeaturesInternal,
98 base::Unretained(this),
99 features));
100 return success_;
103 void ExtractFeaturesInternal(FeatureMap* features) {
104 scoped_refptr<content::MessageLoopRunner> message_loop =
105 new content::MessageLoopRunner;
106 extractor_->ExtractFeatures(
107 features,
108 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
109 base::Unretained(this),
110 message_loop->QuitClosure()));
111 message_loop->Run();
114 // Completion callback for feature extraction.
115 void ExtractionDone(const base::Closure& quit_closure,
116 bool success) {
117 success_ = success;
118 quit_closure.Run();
121 // Does the actual work of removing the iframe "frame1" from the document.
122 void RemoveIframe() {
123 content::RenderView* render_view =
124 content::RenderView::FromRoutingID(kRenderViewRoutingId);
125 blink::WebFrame* main_frame = render_view->GetWebView()->mainFrame();
126 ASSERT_TRUE(main_frame);
127 main_frame->executeScript(
128 blink::WebString(
129 "document.body.removeChild(document.getElementById('frame1'));"));
132 bool StartTestServer() {
133 CHECK(!embedded_test_server_);
134 embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
135 embedded_test_server_->RegisterRequestHandler(
136 base::Bind(&PhishingDOMFeatureExtractorTest::HandleRequest,
137 base::Unretained(this)));
138 return embedded_test_server_->InitializeAndWaitUntilReady();
141 scoped_ptr<net::test_server::HttpResponse> HandleRequest(
142 const net::test_server::HttpRequest& request) {
143 std::map<std::string, std::string>::const_iterator host_it =
144 request.headers.find("Host");
145 if (host_it == request.headers.end())
146 return scoped_ptr<net::test_server::HttpResponse>();
148 std::string url =
149 std::string("http://") + host_it->second + request.relative_url;
150 std::map<std::string, std::string>::const_iterator it =
151 responses_.find(url);
152 if (it == responses_.end())
153 return scoped_ptr<net::test_server::HttpResponse>();
155 scoped_ptr<net::test_server::BasicHttpResponse> http_response(
156 new net::test_server::BasicHttpResponse());
157 http_response->set_code(net::HTTP_OK);
158 http_response->set_content_type("text/html");
159 http_response->set_content(it->second);
160 return http_response.Pass();
163 GURL GetURL(const std::string& host, const std::string& path) {
164 GURL::Replacements replace;
165 replace.SetHostStr(host);
166 replace.SetPathStr(path);
167 return embedded_test_server_->base_url().ReplaceComponents(replace);
170 // Returns the URL that was loaded.
171 GURL LoadHtml(const std::string& host, const std::string& content) {
172 GURL url(GetURL(host, ""));
173 responses_[url.spec()] = content;
174 ui_test_utils::NavigateToURL(browser(), url);
175 return url;
178 // Map of url -> response body for network requests from the renderer.
179 // Any urls not in this map are served a 404 error.
180 std::map<std::string, std::string> responses_;
182 scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
183 MockFeatureExtractorClock clock_;
184 scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
185 bool success_; // holds the success value from ExtractFeatures
186 base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
189 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
190 // This test doesn't exercise the extraction timing.
191 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
193 FeatureMap expected_features;
194 expected_features.AddBooleanFeature(features::kPageHasForms);
195 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
196 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
197 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
199 FeatureMap features;
200 LoadHtml(
201 "host.com",
202 "<html><head><body>"
203 "<form action=\"query\"><input type=text><input type=checkbox></form>"
204 "<form action=\"http://cgi.host.com/submit\"></form>"
205 "<form action=\"http://other.com/\"></form>"
206 "<form action=\"query\"></form>"
207 "<form></form></body></html>");
208 ASSERT_TRUE(ExtractFeatures(&features));
209 ExpectFeatureMapsAreEqual(features, expected_features);
211 expected_features.Clear();
212 expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
213 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
215 features.Clear();
216 LoadHtml(
217 "host.com",
218 "<html><head><body>"
219 "<input type=\"radio\"><input type=password></body></html>");
220 ASSERT_TRUE(ExtractFeatures(&features));
221 ExpectFeatureMapsAreEqual(features, expected_features);
223 expected_features.Clear();
224 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
226 features.Clear();
227 LoadHtml(
228 "host.com",
229 "<html><head><body><input></body></html>");
230 ASSERT_TRUE(ExtractFeatures(&features));
231 ExpectFeatureMapsAreEqual(features, expected_features);
233 expected_features.Clear();
234 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
236 features.Clear();
237 LoadHtml(
238 "host.com",
239 "<html><head><body><input type=\"invalid\"></body></html>");
240 ASSERT_TRUE(ExtractFeatures(&features));
241 ExpectFeatureMapsAreEqual(features, expected_features);
244 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
245 // This test doesn't exercise the extraction timing.
246 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
248 FeatureMap expected_features;
249 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
250 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
251 expected_features.AddBooleanFeature(features::kPageLinkDomain +
252 std::string("chromium.org"));
254 FeatureMap features;
255 LoadHtml(
256 "www.host.com",
257 "<html><head><body>"
258 "<a href=\"http://www2.host.com/abc\">link</a>"
259 "<a name=page_anchor></a>"
260 "<a href=\"http://www.chromium.org/\">chromium</a>"
261 "</body></html");
262 ASSERT_TRUE(ExtractFeatures(&features));
263 ExpectFeatureMapsAreEqual(features, expected_features);
265 expected_features.Clear();
266 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
267 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
268 expected_features.AddBooleanFeature(features::kPageLinkDomain +
269 std::string("chromium.org"));
271 net::SpawnedTestServer https_server(
272 net::SpawnedTestServer::TYPE_HTTPS,
273 net::SpawnedTestServer::kLocalhost,
274 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
275 ASSERT_TRUE(https_server.Start());
277 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
278 // so use a domain.
279 std::string url_str = "https://host.com:";
280 url_str += base::IntToString(https_server.host_port_pair().port());
281 url_str += "/files/safe_browsing/secure_link_features.html";
282 ui_test_utils::NavigateToURL(browser(), GURL(url_str));
284 // Click through the certificate error interstitial.
285 content::InterstitialPage* interstitial_page =
286 GetWebContents()->GetInterstitialPage();
287 interstitial_page->Proceed();
288 content::WaitForLoadStop(GetWebContents());
290 features.Clear();
291 ASSERT_TRUE(ExtractFeatures(&features));
292 ExpectFeatureMapsAreEqual(features, expected_features);
295 // Flaky on Win/Linux. https://crbug.com/373155.
296 #if defined(OS_WIN) || defined(OS_LINUX)
297 #define MAYBE_ScriptAndImageFeatures DISABLED_ScriptAndImageFeatures
298 #else
299 #define MAYBE_ScriptAndImageFeatures ScriptAndImageFeatures
300 #endif
301 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest,
302 MAYBE_ScriptAndImageFeatures) {
303 // This test doesn't exercise the extraction timing.
304 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
306 FeatureMap expected_features;
307 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
309 FeatureMap features;
310 LoadHtml(
311 "host.com",
312 "<html><head><script></script><script></script></head></html>");
313 ASSERT_TRUE(ExtractFeatures(&features));
314 ExpectFeatureMapsAreEqual(features, expected_features);
316 expected_features.Clear();
317 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
318 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
319 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
321 features.Clear();
322 net::SpawnedTestServer https_server(
323 net::SpawnedTestServer::TYPE_HTTPS,
324 net::SpawnedTestServer::kLocalhost,
325 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
326 ASSERT_TRUE(https_server.Start());
328 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
329 // so use a domain.
330 std::string url_str = "https://host.com:";
331 url_str += base::IntToString(https_server.host_port_pair().port());
332 url_str += "/files/safe_browsing/secure_script_and_image.html";
333 ui_test_utils::NavigateToURL(browser(), GURL(url_str));
335 // Click through the certificate error interstitial.
336 content::InterstitialPage* interstitial_page =
337 GetWebContents()->GetInterstitialPage();
338 interstitial_page->Proceed();
339 content::WaitForLoadStop(GetWebContents());
341 ASSERT_TRUE(ExtractFeatures(&features));
342 ExpectFeatureMapsAreEqual(features, expected_features);
345 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
346 // This test doesn't exercise the extraction timing.
347 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
349 // Test that features are aggregated across all frames.
351 std::string port = base::IntToString(embedded_test_server_->port());
352 responses_[GetURL("host2.com", "").spec()] =
353 "<html><head><script></script><body>"
354 "<form action=\"http://host4.com/\"><input type=checkbox></form>"
355 "<form action=\"http://host2.com/submit\"></form>"
356 "<a href=\"http://www.host2.com/home\">link</a>"
357 "<iframe src=\"nested.html\"></iframe>"
358 "<body></html>";
360 responses_[GetURL("host2.com", "nested.html").spec()] =
361 "<html><body><input type=password>"
362 "<a href=\"https://host4.com/\">link</a>"
363 "<a href=\"relative\">another</a>"
364 "</body></html>";
366 responses_[GetURL("host3.com", "").spec()] =
367 "<html><head><script></script><body>"
368 "<img src=\"http://host.com/123.png\">"
369 "</body></html>";
371 FeatureMap expected_features;
372 expected_features.AddBooleanFeature(features::kPageHasForms);
373 // Form action domains are compared to the URL of the document they're in,
374 // not the URL of the toplevel page. So http://host2.com/ has two form
375 // actions, one of which is external.
376 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
377 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
378 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
379 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
380 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
381 expected_features.AddBooleanFeature(features::kPageLinkDomain +
382 std::string("host4.com"));
383 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
384 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
385 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
387 FeatureMap features;
388 std::string html(
389 "<html><body><input type=text><a href=\"info.html\">link</a>"
390 "<iframe src=\"http://host2.com:");
391 html += port;
392 html += std::string(
393 "/\"></iframe>"
394 "<iframe src=\"http://host3.com:");
395 html += port;
396 html += std::string("/\"></iframe></body></html>");
398 LoadHtml("host.com", html);
399 ASSERT_TRUE(ExtractFeatures(&features));
400 ExpectFeatureMapsAreEqual(features, expected_features);
403 // Test flakes with LSAN enabled. See http://crbug.com/373155.
404 #if defined(LEAK_SANITIZER)
405 #define MAYBE_Continuation DISABLED_Continuation
406 #else
407 #define MAYBE_Continuation Continuation
408 #endif
409 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, MAYBE_Continuation) {
410 // For this test, we'll cause the feature extraction to run multiple
411 // iterations by incrementing the clock.
413 // This page has a total of 50 elements. For the external forms feature to
414 // be computed correctly, the extractor has to examine the whole document.
415 // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
416 // there isn't one present, which can be confusing for the element counts.
417 std::string response = "<html><head></head><body>"
418 "<form action=\"ondomain\"></form>";
419 for (int i = 0; i < 45; ++i) {
420 response.append("<p>");
422 response.append("<form action=\"http://host2.com/\"></form></body></html>");
424 // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
425 // Note that this assumes kClockCheckGranularity = 10 and
426 // kMaxTimePerChunkMs = 10.
427 base::TimeTicks now = base::TimeTicks::Now();
428 EXPECT_CALL(clock_, Now())
429 // Time check at the start of extraction.
430 .WillOnce(Return(now))
431 // Time check at the start of the first chunk of work.
432 .WillOnce(Return(now))
433 // Time check after the first 10 elements.
434 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
435 // Time check after the next 10 elements. This is over the chunk
436 // time limit, so a continuation task will be posted.
437 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
438 // Time check at the start of the second chunk of work.
439 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
440 // Time check after resuming iteration for the second chunk.
441 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
442 // Time check after the next 10 elements.
443 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
444 // Time check after the next 10 elements. This will trigger another
445 // continuation task.
446 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
447 // Time check at the start of the third chunk of work.
448 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
449 // Time check after resuming iteration for the third chunk.
450 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
451 // Time check after the last 10 elements.
452 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
453 // A final time check for the histograms.
454 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
456 FeatureMap expected_features;
457 expected_features.AddBooleanFeature(features::kPageHasForms);
458 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
460 FeatureMap features;
461 LoadHtml("host.com", response);
462 ASSERT_TRUE(ExtractFeatures(&features));
463 ExpectFeatureMapsAreEqual(features, expected_features);
464 // Make sure none of the mock expectations carry over to the next test.
465 ::testing::Mock::VerifyAndClearExpectations(&clock_);
467 // Now repeat the test with the same page, but advance the clock faster so
468 // that the extraction time exceeds the maximum total time for the feature
469 // extractor. Extraction should fail. Note that this assumes
470 // kMaxTotalTimeMs = 500.
471 EXPECT_CALL(clock_, Now())
472 // Time check at the start of extraction.
473 .WillOnce(Return(now))
474 // Time check at the start of the first chunk of work.
475 .WillOnce(Return(now))
476 // Time check after the first 10 elements.
477 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
478 // Time check at the start of the second chunk of work.
479 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
480 // Time check after resuming iteration for the second chunk.
481 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
482 // Time check after the next 10 elements. This is over the limit.
483 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
484 // A final time check for the histograms.
485 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
487 features.Clear();
488 EXPECT_FALSE(ExtractFeatures(&features));
491 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
492 // In this test, we'll advance the feature extractor so that it is positioned
493 // inside an iframe, and have it pause due to exceeding the chunk time limit.
494 // Then, prior to continuation, the iframe is removed from the document.
495 // As currently implemented, this should finish extraction from the removed
496 // iframe document.
497 responses_[GetURL("host.com", "frame.html").spec()] =
498 "<html><body><p><p><p><input type=password></body></html>";
500 base::TimeTicks now = base::TimeTicks::Now();
501 EXPECT_CALL(clock_, Now())
502 // Time check at the start of extraction.
503 .WillOnce(Return(now))
504 // Time check at the start of the first chunk of work.
505 .WillOnce(Return(now))
506 // Time check after the first 10 elements. Enough time has passed
507 // to stop extraction. Schedule the iframe removal to happen as soon as
508 // the feature extractor returns control to the message loop.
509 .WillOnce(DoAll(
510 Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
511 Return(now + base::TimeDelta::FromMilliseconds(21))))
512 // Time check at the start of the second chunk of work.
513 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
514 // Time check after resuming iteration for the second chunk.
515 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
516 // A final time check for the histograms.
517 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
519 FeatureMap expected_features;
520 expected_features.AddBooleanFeature(features::kPageHasForms);
521 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
523 FeatureMap features;
524 LoadHtml(
525 "host.com",
526 "<html><head></head><body>"
527 "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
528 "<form></form></body></html>");
529 ASSERT_TRUE(ExtractFeatures(&features));
530 ExpectFeatureMapsAreEqual(features, expected_features);
533 } // namespace safe_browsing