Adding instrumentation to locate the source of jankiness
[chromium-blink-merge.git] / chrome / renderer / safe_browsing / phishing_dom_feature_extractor_browsertest.cc
blob129b6f208e739c953a2214f79e99902ef302ecac
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Note that although this is not a "browser" test, it runs as part of
6 // browser_tests. This is because WebKit does not work properly if it is
7 // shutdown and re-initialized. Since browser_tests runs each test in a
8 // new process, this avoids the problem.
10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
12 #include "base/bind.h"
13 #include "base/callback.h"
14 #include "base/command_line.h"
15 #include "base/compiler_specific.h"
16 #include "base/memory/weak_ptr.h"
17 #include "base/message_loop/message_loop.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/ui/browser.h"
21 #include "chrome/browser/ui/tabs/tab_strip_model.h"
22 #include "chrome/common/chrome_switches.h"
23 #include "chrome/renderer/safe_browsing/features.h"
24 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
25 #include "chrome/renderer/safe_browsing/test_utils.h"
26 #include "chrome/test/base/in_process_browser_test.h"
27 #include "chrome/test/base/ui_test_utils.h"
28 #include "content/public/browser/interstitial_page.h"
29 #include "content/public/browser/web_contents.h"
30 #include "content/public/renderer/render_view.h"
31 #include "content/public/test/browser_test_utils.h"
32 #include "content/public/test/routing_id_mangling_disabler.h"
33 #include "content/public/test/test_utils.h"
34 #include "net/dns/mock_host_resolver.h"
35 #include "net/test/embedded_test_server/embedded_test_server.h"
36 #include "net/test/embedded_test_server/http_request.h"
37 #include "net/test/embedded_test_server/http_response.h"
38 #include "testing/gmock/include/gmock/gmock.h"
39 #include "third_party/WebKit/public/platform/WebString.h"
40 #include "third_party/WebKit/public/web/WebFrame.h"
41 #include "third_party/WebKit/public/web/WebScriptSource.h"
42 #include "third_party/WebKit/public/web/WebView.h"
44 using ::testing::DoAll;
45 using ::testing::Invoke;
46 using ::testing::Return;
48 namespace {
50 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
51 const int kRenderViewRoutingId = 2;
55 namespace safe_browsing {
57 class PhishingDOMFeatureExtractorTest : public InProcessBrowserTest {
58 public:
59 content::WebContents* GetWebContents() {
60 return browser()->tab_strip_model()->GetActiveWebContents();
63 // Helper for the SubframeRemoval test that posts a message to remove
64 // the iframe "frame1" from the document.
65 void ScheduleRemoveIframe() {
66 base::MessageLoop::current()->PostTask(
67 FROM_HERE,
68 base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
69 weak_factory_.GetWeakPtr()));
72 protected:
73 PhishingDOMFeatureExtractorTest() : weak_factory_(this) {}
75 virtual ~PhishingDOMFeatureExtractorTest() {}
77 virtual void SetUpCommandLine(CommandLine* command_line) override {
78 command_line->AppendSwitch(switches::kSingleProcess);
79 #if defined(OS_WIN)
80 // Don't want to try to create a GPU process.
81 command_line->AppendSwitch(switches::kDisableGpu);
82 #endif
85 virtual void SetUpOnMainThread() override {
86 extractor_.reset(new PhishingDOMFeatureExtractor(
87 content::RenderView::FromRoutingID(kRenderViewRoutingId), &clock_));
89 ASSERT_TRUE(StartTestServer());
90 host_resolver()->AddRule("*", "127.0.0.1");
93 // Runs the DOMFeatureExtractor on the RenderView, waiting for the
94 // completion callback. Returns the success boolean from the callback.
95 bool ExtractFeatures(FeatureMap* features) {
96 success_ = false;
97 PostTaskToInProcessRendererAndWait(
98 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractFeaturesInternal,
99 base::Unretained(this),
100 features));
101 return success_;
104 void ExtractFeaturesInternal(FeatureMap* features) {
105 scoped_refptr<content::MessageLoopRunner> message_loop =
106 new content::MessageLoopRunner;
107 extractor_->ExtractFeatures(
108 features,
109 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
110 base::Unretained(this),
111 message_loop->QuitClosure()));
112 message_loop->Run();
115 // Completion callback for feature extraction.
116 void ExtractionDone(const base::Closure& quit_closure,
117 bool success) {
118 success_ = success;
119 quit_closure.Run();
122 // Does the actual work of removing the iframe "frame1" from the document.
123 void RemoveIframe() {
124 content::RenderView* render_view =
125 content::RenderView::FromRoutingID(kRenderViewRoutingId);
126 blink::WebFrame* main_frame = render_view->GetWebView()->mainFrame();
127 ASSERT_TRUE(main_frame);
128 main_frame->executeScript(
129 blink::WebString(
130 "document.body.removeChild(document.getElementById('frame1'));"));
133 bool StartTestServer() {
134 CHECK(!embedded_test_server_);
135 embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
136 embedded_test_server_->RegisterRequestHandler(
137 base::Bind(&PhishingDOMFeatureExtractorTest::HandleRequest,
138 base::Unretained(this)));
139 return embedded_test_server_->InitializeAndWaitUntilReady();
142 scoped_ptr<net::test_server::HttpResponse> HandleRequest(
143 const net::test_server::HttpRequest& request) {
144 std::map<std::string, std::string>::const_iterator host_it =
145 request.headers.find("Host");
146 if (host_it == request.headers.end())
147 return scoped_ptr<net::test_server::HttpResponse>();
149 std::string url =
150 std::string("http://") + host_it->second + request.relative_url;
151 std::map<std::string, std::string>::const_iterator it =
152 responses_.find(url);
153 if (it == responses_.end())
154 return scoped_ptr<net::test_server::HttpResponse>();
156 scoped_ptr<net::test_server::BasicHttpResponse> http_response(
157 new net::test_server::BasicHttpResponse());
158 http_response->set_code(net::HTTP_OK);
159 http_response->set_content_type("text/html");
160 http_response->set_content(it->second);
161 return http_response.PassAs<net::test_server::HttpResponse>();
164 GURL GetURL(const std::string& host, const std::string& path) {
165 GURL::Replacements replace;
166 replace.SetHostStr(host);
167 replace.SetPathStr(path);
168 return embedded_test_server_->base_url().ReplaceComponents(replace);
171 // Returns the URL that was loaded.
172 GURL LoadHtml(const std::string& host, const std::string& content) {
173 GURL url(GetURL(host, ""));
174 responses_[url.spec()] = content;
175 ui_test_utils::NavigateToURL(browser(), url);
176 return url;
179 // Map of url -> response body for network requests from the renderer.
180 // Any urls not in this map are served a 404 error.
181 std::map<std::string, std::string> responses_;
183 content::RoutingIDManglingDisabler mangling_disabler_;
184 scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
185 MockFeatureExtractorClock clock_;
186 scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
187 bool success_; // holds the success value from ExtractFeatures
188 base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
191 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
192 // This test doesn't exercise the extraction timing.
193 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
195 FeatureMap expected_features;
196 expected_features.AddBooleanFeature(features::kPageHasForms);
197 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
198 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
199 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
201 FeatureMap features;
202 LoadHtml(
203 "host.com",
204 "<html><head><body>"
205 "<form action=\"query\"><input type=text><input type=checkbox></form>"
206 "<form action=\"http://cgi.host.com/submit\"></form>"
207 "<form action=\"http://other.com/\"></form>"
208 "<form action=\"query\"></form>"
209 "<form></form></body></html>");
210 ASSERT_TRUE(ExtractFeatures(&features));
211 ExpectFeatureMapsAreEqual(features, expected_features);
213 expected_features.Clear();
214 expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
215 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
217 features.Clear();
218 LoadHtml(
219 "host.com",
220 "<html><head><body>"
221 "<input type=\"radio\"><input type=password></body></html>");
222 ASSERT_TRUE(ExtractFeatures(&features));
223 ExpectFeatureMapsAreEqual(features, expected_features);
225 expected_features.Clear();
226 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
228 features.Clear();
229 LoadHtml(
230 "host.com",
231 "<html><head><body><input></body></html>");
232 ASSERT_TRUE(ExtractFeatures(&features));
233 ExpectFeatureMapsAreEqual(features, expected_features);
235 expected_features.Clear();
236 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
238 features.Clear();
239 LoadHtml(
240 "host.com",
241 "<html><head><body><input type=\"invalid\"></body></html>");
242 ASSERT_TRUE(ExtractFeatures(&features));
243 ExpectFeatureMapsAreEqual(features, expected_features);
246 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
247 // This test doesn't exercise the extraction timing.
248 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
250 FeatureMap expected_features;
251 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
252 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
253 expected_features.AddBooleanFeature(features::kPageLinkDomain +
254 std::string("chromium.org"));
256 FeatureMap features;
257 LoadHtml(
258 "www.host.com",
259 "<html><head><body>"
260 "<a href=\"http://www2.host.com/abc\">link</a>"
261 "<a name=page_anchor></a>"
262 "<a href=\"http://www.chromium.org/\">chromium</a>"
263 "</body></html");
264 ASSERT_TRUE(ExtractFeatures(&features));
265 ExpectFeatureMapsAreEqual(features, expected_features);
267 expected_features.Clear();
268 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
269 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
270 expected_features.AddBooleanFeature(features::kPageLinkDomain +
271 std::string("chromium.org"));
273 net::SpawnedTestServer https_server(
274 net::SpawnedTestServer::TYPE_HTTPS,
275 net::SpawnedTestServer::kLocalhost,
276 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
277 ASSERT_TRUE(https_server.Start());
279 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
280 // so use a domain.
281 std::string url_str = "https://host.com:";
282 url_str += base::IntToString(https_server.host_port_pair().port());
283 url_str += "/files/safe_browsing/secure_link_features.html";
284 ui_test_utils::NavigateToURL(browser(), GURL(url_str));
286 // Click through the certificate error interstitial.
287 content::InterstitialPage* interstitial_page =
288 GetWebContents()->GetInterstitialPage();
289 interstitial_page->Proceed();
290 content::WaitForLoadStop(GetWebContents());
292 features.Clear();
293 ASSERT_TRUE(ExtractFeatures(&features));
294 ExpectFeatureMapsAreEqual(features, expected_features);
297 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest,
298 ScriptAndImageFeatures) {
299 // This test doesn't exercise the extraction timing.
300 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
302 FeatureMap expected_features;
303 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
305 FeatureMap features;
306 LoadHtml(
307 "host.com",
308 "<html><head><script></script><script></script></head></html>");
309 ASSERT_TRUE(ExtractFeatures(&features));
310 ExpectFeatureMapsAreEqual(features, expected_features);
312 expected_features.Clear();
313 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
314 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
315 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
317 features.Clear();
318 net::SpawnedTestServer https_server(
319 net::SpawnedTestServer::TYPE_HTTPS,
320 net::SpawnedTestServer::kLocalhost,
321 base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
322 ASSERT_TRUE(https_server.Start());
324 // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
325 // so use a domain.
326 std::string url_str = "https://host.com:";
327 url_str += base::IntToString(https_server.host_port_pair().port());
328 url_str += "/files/safe_browsing/secure_script_and_image.html";
329 ui_test_utils::NavigateToURL(browser(), GURL(url_str));
331 // Click through the certificate error interstitial.
332 content::InterstitialPage* interstitial_page =
333 GetWebContents()->GetInterstitialPage();
334 interstitial_page->Proceed();
335 content::WaitForLoadStop(GetWebContents());
337 ASSERT_TRUE(ExtractFeatures(&features));
338 ExpectFeatureMapsAreEqual(features, expected_features);
341 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
342 // This test doesn't exercise the extraction timing.
343 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
345 // Test that features are aggregated across all frames.
347 std::string port = base::IntToString(embedded_test_server_->port());
348 responses_[GetURL("host2.com", "").spec()] =
349 "<html><head><script></script><body>"
350 "<form action=\"http://host4.com/\"><input type=checkbox></form>"
351 "<form action=\"http://host2.com/submit\"></form>"
352 "<a href=\"http://www.host2.com/home\">link</a>"
353 "<iframe src=\"nested.html\"></iframe>"
354 "<body></html>";
356 responses_[GetURL("host2.com", "nested.html").spec()] =
357 "<html><body><input type=password>"
358 "<a href=\"https://host4.com/\">link</a>"
359 "<a href=\"relative\">another</a>"
360 "</body></html>";
362 responses_[GetURL("host3.com", "").spec()] =
363 "<html><head><script></script><body>"
364 "<img src=\"http://host.com/123.png\">"
365 "</body></html>";
367 FeatureMap expected_features;
368 expected_features.AddBooleanFeature(features::kPageHasForms);
369 // Form action domains are compared to the URL of the document they're in,
370 // not the URL of the toplevel page. So http://host2.com/ has two form
371 // actions, one of which is external.
372 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
373 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
374 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
375 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
376 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
377 expected_features.AddBooleanFeature(features::kPageLinkDomain +
378 std::string("host4.com"));
379 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
380 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
381 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
383 FeatureMap features;
384 std::string html(
385 "<html><body><input type=text><a href=\"info.html\">link</a>"
386 "<iframe src=\"http://host2.com:");
387 html += port;
388 html += std::string(
389 "/\"></iframe>"
390 "<iframe src=\"http://host3.com:");
391 html += port;
392 html += std::string("/\"></iframe></body></html>");
394 LoadHtml("host.com", html);
395 ASSERT_TRUE(ExtractFeatures(&features));
396 ExpectFeatureMapsAreEqual(features, expected_features);
399 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
400 // For this test, we'll cause the feature extraction to run multiple
401 // iterations by incrementing the clock.
403 // This page has a total of 50 elements. For the external forms feature to
404 // be computed correctly, the extractor has to examine the whole document.
405 // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
406 // there isn't one present, which can be confusing for the element counts.
407 std::string response = "<html><head></head><body>"
408 "<form action=\"ondomain\"></form>";
409 for (int i = 0; i < 45; ++i) {
410 response.append("<p>");
412 response.append("<form action=\"http://host2.com/\"></form></body></html>");
414 // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
415 // Note that this assumes kClockCheckGranularity = 10 and
416 // kMaxTimePerChunkMs = 10.
417 base::TimeTicks now = base::TimeTicks::Now();
418 EXPECT_CALL(clock_, Now())
419 // Time check at the start of extraction.
420 .WillOnce(Return(now))
421 // Time check at the start of the first chunk of work.
422 .WillOnce(Return(now))
423 // Time check after the first 10 elements.
424 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
425 // Time check after the next 10 elements. This is over the chunk
426 // time limit, so a continuation task will be posted.
427 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
428 // Time check at the start of the second chunk of work.
429 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
430 // Time check after resuming iteration for the second chunk.
431 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
432 // Time check after the next 10 elements.
433 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
434 // Time check after the next 10 elements. This will trigger another
435 // continuation task.
436 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
437 // Time check at the start of the third chunk of work.
438 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
439 // Time check after resuming iteration for the third chunk.
440 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
441 // Time check after the last 10 elements.
442 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
443 // A final time check for the histograms.
444 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
446 FeatureMap expected_features;
447 expected_features.AddBooleanFeature(features::kPageHasForms);
448 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
450 FeatureMap features;
451 LoadHtml("host.com", response);
452 ASSERT_TRUE(ExtractFeatures(&features));
453 ExpectFeatureMapsAreEqual(features, expected_features);
454 // Make sure none of the mock expectations carry over to the next test.
455 ::testing::Mock::VerifyAndClearExpectations(&clock_);
457 // Now repeat the test with the same page, but advance the clock faster so
458 // that the extraction time exceeds the maximum total time for the feature
459 // extractor. Extraction should fail. Note that this assumes
460 // kMaxTotalTimeMs = 500.
461 EXPECT_CALL(clock_, Now())
462 // Time check at the start of extraction.
463 .WillOnce(Return(now))
464 // Time check at the start of the first chunk of work.
465 .WillOnce(Return(now))
466 // Time check after the first 10 elements.
467 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
468 // Time check at the start of the second chunk of work.
469 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
470 // Time check after resuming iteration for the second chunk.
471 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
472 // Time check after the next 10 elements. This is over the limit.
473 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
474 // A final time check for the histograms.
475 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
477 features.Clear();
478 EXPECT_FALSE(ExtractFeatures(&features));
481 IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
482 // In this test, we'll advance the feature extractor so that it is positioned
483 // inside an iframe, and have it pause due to exceeding the chunk time limit.
484 // Then, prior to continuation, the iframe is removed from the document.
485 // As currently implemented, this should finish extraction from the removed
486 // iframe document.
487 responses_[GetURL("host.com", "frame.html").spec()] =
488 "<html><body><p><p><p><input type=password></body></html>";
490 base::TimeTicks now = base::TimeTicks::Now();
491 EXPECT_CALL(clock_, Now())
492 // Time check at the start of extraction.
493 .WillOnce(Return(now))
494 // Time check at the start of the first chunk of work.
495 .WillOnce(Return(now))
496 // Time check after the first 10 elements. Enough time has passed
497 // to stop extraction. Schedule the iframe removal to happen as soon as
498 // the feature extractor returns control to the message loop.
499 .WillOnce(DoAll(
500 Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
501 Return(now + base::TimeDelta::FromMilliseconds(21))))
502 // Time check at the start of the second chunk of work.
503 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
504 // Time check after resuming iteration for the second chunk.
505 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
506 // A final time check for the histograms.
507 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
509 FeatureMap expected_features;
510 expected_features.AddBooleanFeature(features::kPageHasForms);
511 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
513 FeatureMap features;
514 LoadHtml(
515 "host.com",
516 "<html><head></head><body>"
517 "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
518 "<form></form></body></html>");
519 ASSERT_TRUE(ExtractFeatures(&features));
520 ExpectFeatureMapsAreEqual(features, expected_features);
523 } // namespace safe_browsing