Don't preload rarely seen large images
[chromium-blink-merge.git] / components / dom_distiller / content / distiller_page_web_contents_browsertest.cc
blob4228add711e33115c5239cbdb58f36b1ce4b342f
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "base/values.h"
10 #include "components/dom_distiller/content/distiller_javascript_utils.h"
11 #include "components/dom_distiller/content/distiller_page_web_contents.h"
12 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
13 #include "components/dom_distiller/core/distiller_page.h"
14 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
15 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
16 #include "components/dom_distiller/core/viewer.h"
17 #include "content/public/browser/browser_context.h"
18 #include "content/public/browser/navigation_controller.h"
19 #include "content/public/browser/render_frame_host.h"
20 #include "content/public/browser/web_contents_observer.h"
21 #include "content/public/common/isolated_world_ids.h"
22 #include "content/public/test/content_browser_test.h"
23 #include "content/shell/browser/shell.h"
24 #include "grit/components_strings.h"
25 #include "net/test/embedded_test_server/embedded_test_server.h"
26 #include "testing/gmock/include/gmock/gmock.h"
27 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
28 #include "ui/base/l10n/l10n_util.h"
29 #include "ui/base/resource/resource_bundle.h"
31 using content::ContentBrowserTest;
32 using testing::ContainsRegex;
33 using testing::HasSubstr;
34 using testing::Not;
36 namespace {
38 // Helper class to know how far in the loading process the current WebContents
39 // has come. It will call the callback either after
40 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
41 // main frame, based on the value of |wait_for_document_loaded|.
42 class WebContentsMainFrameHelper : public content::WebContentsObserver {
43 public:
44 WebContentsMainFrameHelper(content::WebContents* web_contents,
45 const base::Closure& callback,
46 bool wait_for_document_loaded)
47 : WebContentsObserver(web_contents),
48 callback_(callback),
49 wait_for_document_loaded_(wait_for_document_loaded) {}
51 void DidCommitProvisionalLoadForFrame(
52 content::RenderFrameHost* render_frame_host,
53 const GURL& url,
54 ui::PageTransition transition_type) override {
55 if (wait_for_document_loaded_)
56 return;
57 if (!render_frame_host->GetParent())
58 callback_.Run();
61 void DocumentLoadedInFrame(
62 content::RenderFrameHost* render_frame_host) override {
63 if (wait_for_document_loaded_) {
64 if (!render_frame_host->GetParent())
65 callback_.Run();
69 private:
70 base::Closure callback_;
71 bool wait_for_document_loaded_;
74 } // namespace
76 namespace dom_distiller {
78 const char* kSimpleArticlePath = "/simple_article.html";
79 const char* kVideoArticlePath = "/video_article.html";
81 class DistillerPageWebContentsTest : public ContentBrowserTest {
82 public:
83 // ContentBrowserTest:
84 void SetUpOnMainThread() override {
85 if (!DistillerJavaScriptWorldIdIsSet()) {
86 SetDistillerJavaScriptWorldId(content::ISOLATED_WORLD_ID_CONTENT_END);
88 AddComponentsResources();
89 SetUpTestServer();
90 ContentBrowserTest::SetUpOnMainThread();
93 void DistillPage(const base::Closure& quit_closure, const std::string& url) {
94 quit_closure_ = quit_closure;
95 distiller_page_->DistillPage(
96 embedded_test_server()->GetURL(url),
97 dom_distiller::proto::DomDistillerOptions(),
98 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
99 this));
102 void OnPageDistillationFinished(
103 scoped_ptr<proto::DomDistillerResult> distiller_result,
104 bool distillation_successful) {
105 distiller_result_ = distiller_result.Pass();
106 quit_closure_.Run();
109 void OnJsExecutionDone(base::Closure callback, const base::Value* value) {
110 js_result_.reset(value->DeepCopy());
111 callback.Run();
114 private:
115 void AddComponentsResources() {
116 base::FilePath pak_file;
117 base::FilePath pak_dir;
118 #if defined(OS_ANDROID)
119 CHECK(PathService::Get(base::DIR_ANDROID_APP_DATA, &pak_dir));
120 pak_dir = pak_dir.Append(FILE_PATH_LITERAL("paks"));
121 #else
122 PathService::Get(base::DIR_MODULE, &pak_dir);
123 #endif // OS_ANDROID
124 pak_file =
125 pak_dir.Append(FILE_PATH_LITERAL("components_tests_resources.pak"));
126 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
127 pak_file, ui::SCALE_FACTOR_NONE);
130 void SetUpTestServer() {
131 base::FilePath path;
132 PathService::Get(base::DIR_SOURCE_ROOT, &path);
133 embedded_test_server()->ServeFilesFromDirectory(
134 path.AppendASCII("components/test/data/dom_distiller"));
135 embedded_test_server()->ServeFilesFromDirectory(
136 path.AppendASCII("components/dom_distiller/core/javascript"));
137 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
140 protected:
141 void RunUseCurrentWebContentsTest(const std::string& url,
142 bool expect_new_web_contents,
143 bool setup_main_frame_observer,
144 bool wait_for_document_loaded);
146 DistillerPageWebContents* distiller_page_;
147 base::Closure quit_closure_;
148 scoped_ptr<proto::DomDistillerResult> distiller_result_;
149 scoped_ptr<base::Value> js_result_;
152 // Use this class to be able to leak the WebContents, which is needed for when
153 // the current WebContents is used for distillation.
154 class TestDistillerPageWebContents : public DistillerPageWebContents {
155 public:
156 TestDistillerPageWebContents(
157 content::BrowserContext* browser_context,
158 const gfx::Size& render_view_size,
159 scoped_ptr<SourcePageHandleWebContents> optional_web_contents_handle,
160 bool expect_new_web_contents)
161 : DistillerPageWebContents(browser_context, render_view_size,
162 optional_web_contents_handle.Pass()),
163 expect_new_web_contents_(expect_new_web_contents),
164 new_web_contents_created_(false) {}
166 void CreateNewWebContents(const GURL& url) override {
167 ASSERT_EQ(true, expect_new_web_contents_);
168 new_web_contents_created_ = true;
169 DistillerPageWebContents::CreateNewWebContents(url);
172 bool new_web_contents_created() { return new_web_contents_created_; }
174 private:
175 bool expect_new_web_contents_;
176 bool new_web_contents_created_;
179 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
180 DistillerPageWebContents distiller_page(
181 shell()->web_contents()->GetBrowserContext(),
182 shell()->web_contents()->GetContainerBounds().size(),
183 scoped_ptr<SourcePageHandleWebContents>());
184 distiller_page_ = &distiller_page;
186 base::RunLoop run_loop;
187 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
188 run_loop.Run();
190 EXPECT_EQ("Test Page Title", distiller_result_->title());
191 EXPECT_THAT(distiller_result_->distilled_content().html(),
192 HasSubstr("Lorem ipsum"));
193 EXPECT_THAT(distiller_result_->distilled_content().html(),
194 Not(HasSubstr("questionable content")));
195 EXPECT_EQ("", distiller_result_->pagination_info().next_page());
196 EXPECT_EQ("", distiller_result_->pagination_info().prev_page());
199 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
200 DistillerPageWebContents distiller_page(
201 shell()->web_contents()->GetBrowserContext(),
202 shell()->web_contents()->GetContainerBounds().size(),
203 scoped_ptr<SourcePageHandleWebContents>());
204 distiller_page_ = &distiller_page;
206 base::RunLoop run_loop;
207 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
208 run_loop.Run();
210 // A relative link should've been updated.
211 EXPECT_THAT(distiller_result_->distilled_content().html(),
212 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
213 EXPECT_THAT(distiller_result_->distilled_content().html(),
214 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
217 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
218 DistillerPageWebContents distiller_page(
219 shell()->web_contents()->GetBrowserContext(),
220 shell()->web_contents()->GetContainerBounds().size(),
221 scoped_ptr<SourcePageHandleWebContents>());
222 distiller_page_ = &distiller_page;
224 base::RunLoop run_loop;
225 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
226 run_loop.Run();
228 // A relative link should've been updated.
229 EXPECT_THAT(distiller_result_->distilled_content().html(),
230 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
231 EXPECT_THAT(distiller_result_->distilled_content().html(),
232 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
236 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) {
237 DistillerPageWebContents distiller_page(
238 shell()->web_contents()->GetBrowserContext(),
239 shell()->web_contents()->GetContainerBounds().size(),
240 scoped_ptr<SourcePageHandleWebContents>());
241 distiller_page_ = &distiller_page;
243 base::RunLoop run_loop;
244 DistillPage(run_loop.QuitClosure(), kVideoArticlePath);
245 run_loop.Run();
247 // A relative source/track should've been updated.
248 EXPECT_THAT(distiller_result_->distilled_content().html(),
249 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
250 EXPECT_THAT(
251 distiller_result_->distilled_content().html(),
252 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
253 EXPECT_THAT(distiller_result_->distilled_content().html(),
254 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
255 EXPECT_THAT(distiller_result_->distilled_content().html(),
256 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
259 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
260 DistillerPageWebContents distiller_page(
261 shell()->web_contents()->GetBrowserContext(),
262 shell()->web_contents()->GetContainerBounds().size(),
263 scoped_ptr<SourcePageHandleWebContents>());
264 distiller_page_ = &distiller_page;
266 // visble_style.html and invisible_style.html only differ by the visibility
267 // internal stylesheet.
270 base::RunLoop run_loop;
271 DistillPage(run_loop.QuitClosure(), "/visible_style.html");
272 run_loop.Run();
273 EXPECT_THAT(distiller_result_->distilled_content().html(),
274 HasSubstr("Lorem ipsum"));
278 base::RunLoop run_loop;
279 DistillPage(run_loop.QuitClosure(), "/invisible_style.html");
280 run_loop.Run();
281 EXPECT_THAT(distiller_result_->distilled_content().html(),
282 Not(HasSubstr("Lorem ipsum")));
286 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
287 UsingCurrentWebContentsWrongUrl) {
288 std::string url("/bogus");
289 bool expect_new_web_contents = true;
290 bool setup_main_frame_observer = true;
291 bool wait_for_document_loaded = true;
292 RunUseCurrentWebContentsTest(url,
293 expect_new_web_contents,
294 setup_main_frame_observer,
295 wait_for_document_loaded);
298 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
299 UsingCurrentWebContentsNoMainFrameObserver) {
300 std::string url(kSimpleArticlePath);
301 bool expect_new_web_contents = true;
302 bool setup_main_frame_observer = false;
303 bool wait_for_document_loaded = true;
304 RunUseCurrentWebContentsTest(url,
305 expect_new_web_contents,
306 setup_main_frame_observer,
307 wait_for_document_loaded);
310 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
311 UsingCurrentWebContentsNotFinishedLoadingYet) {
312 std::string url(kSimpleArticlePath);
313 bool expect_new_web_contents = false;
314 bool setup_main_frame_observer = true;
315 bool wait_for_document_loaded = false;
316 RunUseCurrentWebContentsTest(url,
317 expect_new_web_contents,
318 setup_main_frame_observer,
319 wait_for_document_loaded);
322 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
323 UsingCurrentWebContentsReadyForDistillation) {
324 std::string url(kSimpleArticlePath);
325 bool expect_new_web_contents = false;
326 bool setup_main_frame_observer = true;
327 bool wait_for_document_loaded = true;
328 RunUseCurrentWebContentsTest(url,
329 expect_new_web_contents,
330 setup_main_frame_observer,
331 wait_for_document_loaded);
334 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
335 const std::string& url,
336 bool expect_new_web_contents,
337 bool setup_main_frame_observer,
338 bool wait_for_document_loaded) {
339 content::WebContents* current_web_contents = shell()->web_contents();
340 if (setup_main_frame_observer) {
341 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
342 current_web_contents);
344 base::RunLoop url_loaded_runner;
345 WebContentsMainFrameHelper main_frame_loaded(current_web_contents,
346 url_loaded_runner.QuitClosure(),
347 wait_for_document_loaded);
348 current_web_contents->GetController().LoadURL(
349 embedded_test_server()->GetURL(url),
350 content::Referrer(),
351 ui::PAGE_TRANSITION_TYPED,
352 std::string());
353 url_loaded_runner.Run();
355 scoped_ptr<SourcePageHandleWebContents> source_page_handle(
356 new SourcePageHandleWebContents(current_web_contents, false));
358 TestDistillerPageWebContents distiller_page(
359 shell()->web_contents()->GetBrowserContext(),
360 shell()->web_contents()->GetContainerBounds().size(),
361 source_page_handle.Pass(),
362 expect_new_web_contents);
363 distiller_page_ = &distiller_page;
365 base::RunLoop run_loop;
366 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
367 run_loop.Run();
369 // Sanity check of distillation process.
370 EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created());
371 EXPECT_EQ("Test Page Title", distiller_result_->title());
374 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
375 PageDestroyedBeforeFinishDistillation) {
377 content::WebContents* current_web_contents = shell()->web_contents();
379 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
380 current_web_contents);
382 base::RunLoop url_loaded_runner;
383 WebContentsMainFrameHelper main_frame_loaded(current_web_contents,
384 url_loaded_runner.QuitClosure(),
385 true);
386 current_web_contents->GetController().LoadURL(
387 embedded_test_server()->GetURL(kSimpleArticlePath),
388 content::Referrer(),
389 ui::PAGE_TRANSITION_TYPED,
390 std::string());
391 url_loaded_runner.Run();
393 scoped_ptr<SourcePageHandleWebContents> source_page_handle(
394 new SourcePageHandleWebContents(current_web_contents, false));
396 TestDistillerPageWebContents* distiller_page(
397 new TestDistillerPageWebContents(
398 current_web_contents->GetBrowserContext(),
399 current_web_contents->GetContainerBounds().size(),
400 source_page_handle.Pass(),
401 false));
402 distiller_page_ = distiller_page;
404 base::RunLoop run_loop;
405 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
407 // It can not crash the loop when returning the result.
408 delete distiller_page_;
410 // Make sure the test ends when it does not crash.
411 base::MessageLoop::current()->PostDelayedTask(
412 FROM_HERE, run_loop.QuitClosure(), base::TimeDelta::FromSeconds(2));
414 run_loop.Run();
417 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
418 DistillerPageWebContents distiller_page(
419 shell()->web_contents()->GetBrowserContext(),
420 shell()->web_contents()->GetContainerBounds().size(),
421 scoped_ptr<SourcePageHandleWebContents>());
422 distiller_page_ = &distiller_page;
424 base::RunLoop run_loop;
425 DistillPage(run_loop.QuitClosure(), "/markup_article.html");
426 run_loop.Run();
428 EXPECT_THAT(distiller_result_->distilled_content().html(),
429 HasSubstr("Lorem ipsum"));
430 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_->title());
432 const proto::MarkupInfo markup_info = distiller_result_->markup_info();
433 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title());
434 EXPECT_EQ("Article", markup_info.type());
435 EXPECT_EQ("http://test/markup.html", markup_info.url());
436 EXPECT_EQ("This page tests Markup Info.", markup_info.description());
437 EXPECT_EQ("Whoever Published", markup_info.publisher());
438 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright());
439 EXPECT_EQ("Whoever Authored", markup_info.author());
441 const proto::MarkupArticle markup_article = markup_info.article();
442 EXPECT_EQ("Whatever Section", markup_article.section());
443 EXPECT_EQ("July 23, 2014", markup_article.published_time());
444 EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time());
445 EXPECT_EQ("", markup_article.expiration_time());
446 ASSERT_EQ(1, markup_article.authors_size());
447 EXPECT_EQ("Whoever Authored", markup_article.authors(0));
449 ASSERT_EQ(2, markup_info.images_size());
451 const proto::MarkupImage markup_image1 = markup_info.images(0);
452 EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url());
453 EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url());
454 EXPECT_EQ("jpeg", markup_image1.type());
455 EXPECT_EQ("", markup_image1.caption());
456 EXPECT_EQ(600, markup_image1.width());
457 EXPECT_EQ(400, markup_image1.height());
459 const proto::MarkupImage markup_image2 = markup_info.images(1);
460 EXPECT_EQ("http://test/markup2.gif", markup_image2.url());
461 EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url());
462 EXPECT_EQ("gif", markup_image2.type());
463 EXPECT_EQ("", markup_image2.caption());
464 EXPECT_EQ(1000, markup_image2.width());
465 EXPECT_EQ(600, markup_image2.height());
468 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
469 TestNoContentDoesNotCrash) {
470 const std::string no_content =
471 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT);
473 { // Test zero pages.
474 scoped_ptr<DistilledArticleProto> article_proto(
475 new DistilledArticleProto());
476 std::string js = viewer::GetUnsafeArticleContentJs(article_proto.get());
477 EXPECT_THAT(js, HasSubstr(no_content));
480 { // Test empty content.
481 scoped_ptr<DistilledArticleProto> article_proto(
482 new DistilledArticleProto());
483 (*(article_proto->add_pages())).set_html("");
484 std::string js = viewer::GetUnsafeArticleContentJs(article_proto.get());
485 EXPECT_THAT(js, HasSubstr(no_content));
489 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
490 TestPinch) {
491 // Load the test file in content shell and wait until it has fully loaded.
492 content::WebContents* web_contents = shell()->web_contents();
493 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
494 web_contents);
495 base::RunLoop url_loaded_runner;
496 WebContentsMainFrameHelper main_frame_loaded(web_contents,
497 url_loaded_runner.QuitClosure(),
498 true);
499 web_contents->GetController().LoadURL(
500 embedded_test_server()->GetURL("/pinch_tester.html"),
501 content::Referrer(),
502 ui::PAGE_TRANSITION_TYPED,
503 std::string());
504 url_loaded_runner.Run();
506 // Execute the JS to run the tests, and wait until it has finished.
507 base::RunLoop run_loop;
508 web_contents->GetMainFrame()->ExecuteJavaScript(
509 base::UTF8ToUTF16("(function() {return pinchtest.run();})();"),
510 base::Bind(&DistillerPageWebContentsTest::OnJsExecutionDone,
511 base::Unretained(this), run_loop.QuitClosure()));
512 run_loop.Run();
514 // Convert to dictionary and parse the results.
515 const base::DictionaryValue* dict;
516 ASSERT_TRUE(js_result_);
517 ASSERT_TRUE(js_result_->GetAsDictionary(&dict));
519 ASSERT_TRUE(dict->HasKey("success"));
520 bool success;
521 ASSERT_TRUE(dict->GetBoolean("success", &success));
522 EXPECT_TRUE(success);
525 } // namespace dom_distiller