1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "base/values.h"
10 #include "components/dom_distiller/content/distiller_page_web_contents.h"
11 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
12 #include "components/dom_distiller/core/distiller_page.h"
13 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
14 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
15 #include "components/dom_distiller/core/viewer.h"
16 #include "content/public/browser/browser_context.h"
17 #include "content/public/browser/navigation_controller.h"
18 #include "content/public/browser/render_frame_host.h"
19 #include "content/public/browser/web_contents_observer.h"
20 #include "content/public/test/content_browser_test.h"
21 #include "content/shell/browser/shell.h"
22 #include "grit/components_strings.h"
23 #include "net/test/embedded_test_server/embedded_test_server.h"
24 #include "testing/gmock/include/gmock/gmock.h"
25 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
26 #include "ui/base/l10n/l10n_util.h"
27 #include "ui/base/resource/resource_bundle.h"
29 using content::ContentBrowserTest
;
30 using testing::ContainsRegex
;
31 using testing::HasSubstr
;
36 // Helper class to know how far in the loading process the current WebContents
37 // has come. It will call the callback either after
38 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
39 // main frame, based on the value of |wait_for_document_loaded|.
40 class WebContentsMainFrameHelper
: public content::WebContentsObserver
{
42 WebContentsMainFrameHelper(content::WebContents
* web_contents
,
43 const base::Closure
& callback
,
44 bool wait_for_document_loaded
)
45 : WebContentsObserver(web_contents
),
47 wait_for_document_loaded_(wait_for_document_loaded
) {}
49 void DidCommitProvisionalLoadForFrame(
50 content::RenderFrameHost
* render_frame_host
,
52 ui::PageTransition transition_type
) override
{
53 if (wait_for_document_loaded_
)
55 if (!render_frame_host
->GetParent())
59 void DocumentLoadedInFrame(
60 content::RenderFrameHost
* render_frame_host
) override
{
61 if (wait_for_document_loaded_
) {
62 if (!render_frame_host
->GetParent())
68 base::Closure callback_
;
69 bool wait_for_document_loaded_
;
74 namespace dom_distiller
{
76 const char* kSimpleArticlePath
= "/simple_article.html";
77 const char* kVideoArticlePath
= "/video_article.html";
79 class DistillerPageWebContentsTest
: public ContentBrowserTest
{
81 // ContentBrowserTest:
82 void SetUpOnMainThread() override
{
83 AddComponentsResources();
85 ContentBrowserTest::SetUpOnMainThread();
88 void DistillPage(const base::Closure
& quit_closure
, const std::string
& url
) {
89 quit_closure_
= quit_closure
;
90 distiller_page_
->DistillPage(
91 embedded_test_server()->GetURL(url
),
92 dom_distiller::proto::DomDistillerOptions(),
93 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished
,
97 void OnPageDistillationFinished(
98 scoped_ptr
<proto::DomDistillerResult
> distiller_result
,
99 bool distillation_successful
) {
100 distiller_result_
= distiller_result
.Pass();
104 void OnJsExecutionDone(base::Closure callback
, const base::Value
* value
) {
105 js_result_
.reset(value
->DeepCopy());
110 void AddComponentsResources() {
111 base::FilePath pak_file
;
112 base::FilePath pak_dir
;
113 #if defined(OS_ANDROID)
114 CHECK(PathService::Get(base::DIR_ANDROID_APP_DATA
, &pak_dir
));
115 pak_dir
= pak_dir
.Append(FILE_PATH_LITERAL("paks"));
117 PathService::Get(base::DIR_MODULE
, &pak_dir
);
120 pak_dir
.Append(FILE_PATH_LITERAL("components_tests_resources.pak"));
121 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
122 pak_file
, ui::SCALE_FACTOR_NONE
);
125 void SetUpTestServer() {
127 PathService::Get(base::DIR_SOURCE_ROOT
, &path
);
128 embedded_test_server()->ServeFilesFromDirectory(
129 path
.AppendASCII("components/test/data/dom_distiller"));
130 embedded_test_server()->ServeFilesFromDirectory(
131 path
.AppendASCII("components/dom_distiller/core/javascript"));
132 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
136 void RunUseCurrentWebContentsTest(const std::string
& url
,
137 bool expect_new_web_contents
,
138 bool setup_main_frame_observer
,
139 bool wait_for_document_loaded
);
141 DistillerPageWebContents
* distiller_page_
;
142 base::Closure quit_closure_
;
143 scoped_ptr
<proto::DomDistillerResult
> distiller_result_
;
144 scoped_ptr
<base::Value
> js_result_
;
147 // Use this class to be able to leak the WebContents, which is needed for when
148 // the current WebContents is used for distillation.
149 class TestDistillerPageWebContents
: public DistillerPageWebContents
{
151 TestDistillerPageWebContents(
152 content::BrowserContext
* browser_context
,
153 const gfx::Size
& render_view_size
,
154 scoped_ptr
<SourcePageHandleWebContents
> optional_web_contents_handle
,
155 bool expect_new_web_contents
)
156 : DistillerPageWebContents(browser_context
, render_view_size
,
157 optional_web_contents_handle
.Pass()),
158 expect_new_web_contents_(expect_new_web_contents
),
159 new_web_contents_created_(false) {}
161 void CreateNewWebContents(const GURL
& url
) override
{
162 ASSERT_EQ(true, expect_new_web_contents_
);
163 new_web_contents_created_
= true;
164 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
165 // the WebContents, so intentionally leak WebContents here, since it is
166 // owned by the shell.
167 content::WebContents
* web_contents
= web_contents_
.release();
168 web_contents
->GetLastCommittedURL();
169 DistillerPageWebContents::CreateNewWebContents(url
);
172 ~TestDistillerPageWebContents() override
{
173 if (!expect_new_web_contents_
) {
174 // Intentionally leaking WebContents, since it is owned by the shell.
175 content::WebContents
* web_contents
= web_contents_
.release();
176 web_contents
->GetLastCommittedURL();
180 bool new_web_contents_created() { return new_web_contents_created_
; }
183 bool expect_new_web_contents_
;
184 bool new_web_contents_created_
;
187 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, BasicDistillationWorks
) {
188 DistillerPageWebContents
distiller_page(
189 shell()->web_contents()->GetBrowserContext(),
190 shell()->web_contents()->GetContainerBounds().size(),
191 scoped_ptr
<SourcePageHandleWebContents
>());
192 distiller_page_
= &distiller_page
;
194 base::RunLoop run_loop
;
195 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
198 EXPECT_EQ("Test Page Title", distiller_result_
->title());
199 EXPECT_THAT(distiller_result_
->distilled_content().html(),
200 HasSubstr("Lorem ipsum"));
201 EXPECT_THAT(distiller_result_
->distilled_content().html(),
202 Not(HasSubstr("questionable content")));
203 EXPECT_EQ("", distiller_result_
->pagination_info().next_page());
204 EXPECT_EQ("", distiller_result_
->pagination_info().prev_page());
207 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeLinks
) {
208 DistillerPageWebContents
distiller_page(
209 shell()->web_contents()->GetBrowserContext(),
210 shell()->web_contents()->GetContainerBounds().size(),
211 scoped_ptr
<SourcePageHandleWebContents
>());
212 distiller_page_
= &distiller_page
;
214 base::RunLoop run_loop
;
215 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
218 // A relative link should've been updated.
219 EXPECT_THAT(distiller_result_
->distilled_content().html(),
220 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
221 EXPECT_THAT(distiller_result_
->distilled_content().html(),
222 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
225 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeImages
) {
226 DistillerPageWebContents
distiller_page(
227 shell()->web_contents()->GetBrowserContext(),
228 shell()->web_contents()->GetContainerBounds().size(),
229 scoped_ptr
<SourcePageHandleWebContents
>());
230 distiller_page_
= &distiller_page
;
232 base::RunLoop run_loop
;
233 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
236 // A relative link should've been updated.
237 EXPECT_THAT(distiller_result_
->distilled_content().html(),
238 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
239 EXPECT_THAT(distiller_result_
->distilled_content().html(),
240 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
244 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeVideos
) {
245 DistillerPageWebContents
distiller_page(
246 shell()->web_contents()->GetBrowserContext(),
247 shell()->web_contents()->GetContainerBounds().size(),
248 scoped_ptr
<SourcePageHandleWebContents
>());
249 distiller_page_
= &distiller_page
;
251 base::RunLoop run_loop
;
252 DistillPage(run_loop
.QuitClosure(), kVideoArticlePath
);
255 // A relative source/track should've been updated.
256 EXPECT_THAT(distiller_result_
->distilled_content().html(),
257 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
259 distiller_result_
->distilled_content().html(),
260 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
261 EXPECT_THAT(distiller_result_
->distilled_content().html(),
262 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
263 EXPECT_THAT(distiller_result_
->distilled_content().html(),
264 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
267 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, VisibilityDetection
) {
268 DistillerPageWebContents
distiller_page(
269 shell()->web_contents()->GetBrowserContext(),
270 shell()->web_contents()->GetContainerBounds().size(),
271 scoped_ptr
<SourcePageHandleWebContents
>());
272 distiller_page_
= &distiller_page
;
274 // visble_style.html and invisible_style.html only differ by the visibility
275 // internal stylesheet.
278 base::RunLoop run_loop
;
279 DistillPage(run_loop
.QuitClosure(), "/visible_style.html");
281 EXPECT_THAT(distiller_result_
->distilled_content().html(),
282 HasSubstr("Lorem ipsum"));
286 base::RunLoop run_loop
;
287 DistillPage(run_loop
.QuitClosure(), "/invisible_style.html");
289 EXPECT_THAT(distiller_result_
->distilled_content().html(),
290 Not(HasSubstr("Lorem ipsum")));
294 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
295 UsingCurrentWebContentsWrongUrl
) {
296 std::string
url("/bogus");
297 bool expect_new_web_contents
= true;
298 bool setup_main_frame_observer
= true;
299 bool wait_for_document_loaded
= true;
300 RunUseCurrentWebContentsTest(url
,
301 expect_new_web_contents
,
302 setup_main_frame_observer
,
303 wait_for_document_loaded
);
306 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
307 UsingCurrentWebContentsNoMainFrameObserver
) {
308 std::string
url(kSimpleArticlePath
);
309 bool expect_new_web_contents
= true;
310 bool setup_main_frame_observer
= false;
311 bool wait_for_document_loaded
= true;
312 RunUseCurrentWebContentsTest(url
,
313 expect_new_web_contents
,
314 setup_main_frame_observer
,
315 wait_for_document_loaded
);
318 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
319 UsingCurrentWebContentsNotFinishedLoadingYet
) {
320 std::string
url(kSimpleArticlePath
);
321 bool expect_new_web_contents
= false;
322 bool setup_main_frame_observer
= true;
323 bool wait_for_document_loaded
= false;
324 RunUseCurrentWebContentsTest(url
,
325 expect_new_web_contents
,
326 setup_main_frame_observer
,
327 wait_for_document_loaded
);
330 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
331 UsingCurrentWebContentsReadyForDistillation
) {
332 std::string
url(kSimpleArticlePath
);
333 bool expect_new_web_contents
= false;
334 bool setup_main_frame_observer
= true;
335 bool wait_for_document_loaded
= true;
336 RunUseCurrentWebContentsTest(url
,
337 expect_new_web_contents
,
338 setup_main_frame_observer
,
339 wait_for_document_loaded
);
342 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
343 const std::string
& url
,
344 bool expect_new_web_contents
,
345 bool setup_main_frame_observer
,
346 bool wait_for_document_loaded
) {
347 content::WebContents
* current_web_contents
= shell()->web_contents();
348 if (setup_main_frame_observer
) {
349 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
350 current_web_contents
);
352 base::RunLoop url_loaded_runner
;
353 WebContentsMainFrameHelper
main_frame_loaded(current_web_contents
,
354 url_loaded_runner
.QuitClosure(),
355 wait_for_document_loaded
);
356 current_web_contents
->GetController().LoadURL(
357 embedded_test_server()->GetURL(url
),
359 ui::PAGE_TRANSITION_TYPED
,
361 url_loaded_runner
.Run();
363 scoped_ptr
<content::WebContents
> old_web_contents_sptr(current_web_contents
);
364 scoped_ptr
<SourcePageHandleWebContents
> source_page_handle(
365 new SourcePageHandleWebContents(old_web_contents_sptr
.Pass()));
367 TestDistillerPageWebContents
distiller_page(
368 shell()->web_contents()->GetBrowserContext(),
369 shell()->web_contents()->GetContainerBounds().size(),
370 source_page_handle
.Pass(),
371 expect_new_web_contents
);
372 distiller_page_
= &distiller_page
;
374 base::RunLoop run_loop
;
375 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
378 // Sanity check of distillation process.
379 EXPECT_EQ(expect_new_web_contents
, distiller_page
.new_web_contents_created());
380 EXPECT_EQ("Test Page Title", distiller_result_
->title());
383 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, MarkupInfo
) {
384 DistillerPageWebContents
distiller_page(
385 shell()->web_contents()->GetBrowserContext(),
386 shell()->web_contents()->GetContainerBounds().size(),
387 scoped_ptr
<SourcePageHandleWebContents
>());
388 distiller_page_
= &distiller_page
;
390 base::RunLoop run_loop
;
391 DistillPage(run_loop
.QuitClosure(), "/markup_article.html");
394 EXPECT_THAT(distiller_result_
->distilled_content().html(),
395 HasSubstr("Lorem ipsum"));
396 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_
->title());
398 const proto::MarkupInfo markup_info
= distiller_result_
->markup_info();
399 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info
.title());
400 EXPECT_EQ("Article", markup_info
.type());
401 EXPECT_EQ("http://test/markup.html", markup_info
.url());
402 EXPECT_EQ("This page tests Markup Info.", markup_info
.description());
403 EXPECT_EQ("Whoever Published", markup_info
.publisher());
404 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info
.copyright());
405 EXPECT_EQ("Whoever Authored", markup_info
.author());
407 const proto::MarkupArticle markup_article
= markup_info
.article();
408 EXPECT_EQ("Whatever Section", markup_article
.section());
409 EXPECT_EQ("July 23, 2014", markup_article
.published_time());
410 EXPECT_EQ("2014-07-23T23:59", markup_article
.modified_time());
411 EXPECT_EQ("", markup_article
.expiration_time());
412 ASSERT_EQ(1, markup_article
.authors_size());
413 EXPECT_EQ("Whoever Authored", markup_article
.authors(0));
415 ASSERT_EQ(2, markup_info
.images_size());
417 const proto::MarkupImage markup_image1
= markup_info
.images(0);
418 EXPECT_EQ("http://test/markup1.jpeg", markup_image1
.url());
419 EXPECT_EQ("https://test/markup1.jpeg", markup_image1
.secure_url());
420 EXPECT_EQ("jpeg", markup_image1
.type());
421 EXPECT_EQ("", markup_image1
.caption());
422 EXPECT_EQ(600, markup_image1
.width());
423 EXPECT_EQ(400, markup_image1
.height());
425 const proto::MarkupImage markup_image2
= markup_info
.images(1);
426 EXPECT_EQ("http://test/markup2.gif", markup_image2
.url());
427 EXPECT_EQ("https://test/markup2.gif", markup_image2
.secure_url());
428 EXPECT_EQ("gif", markup_image2
.type());
429 EXPECT_EQ("", markup_image2
.caption());
430 EXPECT_EQ(1000, markup_image2
.width());
431 EXPECT_EQ(600, markup_image2
.height());
434 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, TestTitleNeverEmpty
) {
435 const std::string some_title
= "some title";
436 const std::string no_title
=
437 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE
);
439 { // Test empty title for article.
440 scoped_ptr
<DistilledArticleProto
> article_proto(
441 new DistilledArticleProto());
442 article_proto
->set_title("");
443 (*(article_proto
->add_pages())).set_html("");
444 std::string html
= viewer::GetUnsafeArticleTemplateHtml(
445 &article_proto
.get()->pages(0), DistilledPagePrefs::LIGHT
,
446 DistilledPagePrefs::SERIF
);
447 EXPECT_THAT(html
, HasSubstr(no_title
));
448 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
451 { // Test empty title for page.
452 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
453 page_proto
->set_title("");
454 page_proto
->set_html("");
455 std::string html
= viewer::GetUnsafeArticleTemplateHtml(
456 page_proto
.get(), DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
457 EXPECT_THAT(html
, HasSubstr(no_title
));
458 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
461 { // Test missing title for page.
462 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
463 std::string html
= viewer::GetUnsafeArticleTemplateHtml(
464 page_proto
.get(), DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
465 EXPECT_THAT(html
, HasSubstr(no_title
));
466 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
470 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
472 // Load the test file in content shell and wait until it has fully loaded.
473 content::WebContents
* web_contents
= shell()->web_contents();
474 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
476 base::RunLoop url_loaded_runner
;
477 WebContentsMainFrameHelper
main_frame_loaded(web_contents
,
478 url_loaded_runner
.QuitClosure(),
480 web_contents
->GetController().LoadURL(
481 embedded_test_server()->GetURL("/pinch_tester.html"),
483 ui::PAGE_TRANSITION_TYPED
,
485 url_loaded_runner
.Run();
487 // Execute the JS to run the tests, and wait until it has finished.
488 base::RunLoop run_loop
;
489 web_contents
->GetMainFrame()->ExecuteJavaScript(
490 base::UTF8ToUTF16("(function() {return pinchtest.run();})();"),
491 base::Bind(&DistillerPageWebContentsTest::OnJsExecutionDone
,
492 base::Unretained(this), run_loop
.QuitClosure()));
495 // Convert to dictionary and parse the results.
496 const base::DictionaryValue
* dict
;
497 ASSERT_TRUE(js_result_
);
498 ASSERT_TRUE(js_result_
->GetAsDictionary(&dict
));
500 ASSERT_TRUE(dict
->HasKey("success"));
502 ASSERT_TRUE(dict
->GetBoolean("success", &success
));
503 EXPECT_TRUE(success
);
506 } // namespace dom_distiller