1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
13 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
14 #include "components/dom_distiller/core/viewer.h"
15 #include "content/public/browser/browser_context.h"
16 #include "content/public/browser/navigation_controller.h"
17 #include "content/public/browser/render_frame_host.h"
18 #include "content/public/browser/web_contents_observer.h"
19 #include "content/public/test/content_browser_test.h"
20 #include "content/shell/browser/shell.h"
21 #include "grit/components_strings.h"
22 #include "net/test/embedded_test_server/embedded_test_server.h"
23 #include "testing/gmock/include/gmock/gmock.h"
24 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
25 #include "ui/base/l10n/l10n_util.h"
26 #include "ui/base/resource/resource_bundle.h"
28 using content::ContentBrowserTest
;
29 using testing::ContainsRegex
;
30 using testing::HasSubstr
;
33 namespace dom_distiller
{
35 const char* kSimpleArticlePath
= "/simple_article.html";
36 const char* kVideoArticlePath
= "/video_article.html";
38 class DistillerPageWebContentsTest
: public ContentBrowserTest
{
40 // ContentBrowserTest:
41 void SetUpOnMainThread() override
{
42 AddComponentsResources();
44 ContentBrowserTest::SetUpOnMainThread();
47 void DistillPage(const base::Closure
& quit_closure
, const std::string
& url
) {
48 quit_closure_
= quit_closure
;
49 distiller_page_
->DistillPage(
50 embedded_test_server()->GetURL(url
),
51 dom_distiller::proto::DomDistillerOptions(),
52 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished
,
56 void OnPageDistillationFinished(
57 scoped_ptr
<proto::DomDistillerResult
> distiller_result
,
58 bool distillation_successful
) {
59 distiller_result_
= distiller_result
.Pass();
64 void AddComponentsResources() {
65 base::FilePath pak_file
;
66 base::FilePath pak_dir
;
67 PathService::Get(base::DIR_MODULE
, &pak_dir
);
69 pak_dir
.Append(FILE_PATH_LITERAL("components_tests_resources.pak"));
70 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
71 pak_file
, ui::SCALE_FACTOR_NONE
);
74 void SetUpTestServer() {
76 PathService::Get(base::DIR_SOURCE_ROOT
, &path
);
77 path
= path
.AppendASCII("components/test/data/dom_distiller");
78 embedded_test_server()->ServeFilesFromDirectory(path
);
79 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
83 void RunUseCurrentWebContentsTest(const std::string
& url
,
84 bool expect_new_web_contents
,
85 bool setup_main_frame_observer
,
86 bool wait_for_document_loaded
);
88 DistillerPageWebContents
* distiller_page_
;
89 base::Closure quit_closure_
;
90 scoped_ptr
<proto::DomDistillerResult
> distiller_result_
;
93 // Use this class to be able to leak the WebContents, which is needed for when
94 // the current WebContents is used for distillation.
95 class TestDistillerPageWebContents
: public DistillerPageWebContents
{
97 TestDistillerPageWebContents(
98 content::BrowserContext
* browser_context
,
99 const gfx::Size
& render_view_size
,
100 scoped_ptr
<SourcePageHandleWebContents
> optional_web_contents_handle
,
101 bool expect_new_web_contents
)
102 : DistillerPageWebContents(browser_context
, render_view_size
,
103 optional_web_contents_handle
.Pass()),
104 expect_new_web_contents_(expect_new_web_contents
),
105 new_web_contents_created_(false) {}
107 void CreateNewWebContents(const GURL
& url
) override
{
108 ASSERT_EQ(true, expect_new_web_contents_
);
109 new_web_contents_created_
= true;
110 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
111 // the WebContents, so intentionally leak WebContents here, since it is
112 // owned by the shell.
113 content::WebContents
* web_contents
= web_contents_
.release();
114 web_contents
->GetLastCommittedURL();
115 DistillerPageWebContents::CreateNewWebContents(url
);
118 ~TestDistillerPageWebContents() override
{
119 if (!expect_new_web_contents_
) {
120 // Intentionally leaking WebContents, since it is owned by the shell.
121 content::WebContents
* web_contents
= web_contents_
.release();
122 web_contents
->GetLastCommittedURL();
126 bool new_web_contents_created() { return new_web_contents_created_
; }
129 bool expect_new_web_contents_
;
130 bool new_web_contents_created_
;
133 // Helper class to know how far in the loading process the current WebContents
134 // has come. It will call the callback either after
135 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
136 // main frame, based on the value of |wait_for_document_loaded|.
137 class WebContentsMainFrameHelper
: public content::WebContentsObserver
{
139 WebContentsMainFrameHelper(content::WebContents
* web_contents
,
140 const base::Closure
& callback
,
141 bool wait_for_document_loaded
)
142 : WebContentsObserver(web_contents
),
144 wait_for_document_loaded_(wait_for_document_loaded
) {}
146 void DidCommitProvisionalLoadForFrame(
147 content::RenderFrameHost
* render_frame_host
,
149 ui::PageTransition transition_type
) override
{
150 if (wait_for_document_loaded_
)
152 if (!render_frame_host
->GetParent())
156 void DocumentLoadedInFrame(
157 content::RenderFrameHost
* render_frame_host
) override
{
158 if (wait_for_document_loaded_
) {
159 if (!render_frame_host
->GetParent())
165 base::Closure callback_
;
166 bool wait_for_document_loaded_
;
169 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, BasicDistillationWorks
) {
170 DistillerPageWebContents
distiller_page(
171 shell()->web_contents()->GetBrowserContext(),
172 shell()->web_contents()->GetContainerBounds().size(),
173 scoped_ptr
<SourcePageHandleWebContents
>());
174 distiller_page_
= &distiller_page
;
176 base::RunLoop run_loop
;
177 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
180 EXPECT_EQ("Test Page Title", distiller_result_
->title());
181 EXPECT_THAT(distiller_result_
->distilled_content().html(),
182 HasSubstr("Lorem ipsum"));
183 EXPECT_THAT(distiller_result_
->distilled_content().html(),
184 Not(HasSubstr("questionable content")));
185 EXPECT_EQ("", distiller_result_
->pagination_info().next_page());
186 EXPECT_EQ("", distiller_result_
->pagination_info().prev_page());
189 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeLinks
) {
190 DistillerPageWebContents
distiller_page(
191 shell()->web_contents()->GetBrowserContext(),
192 shell()->web_contents()->GetContainerBounds().size(),
193 scoped_ptr
<SourcePageHandleWebContents
>());
194 distiller_page_
= &distiller_page
;
196 base::RunLoop run_loop
;
197 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
200 // A relative link should've been updated.
201 EXPECT_THAT(distiller_result_
->distilled_content().html(),
202 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
203 EXPECT_THAT(distiller_result_
->distilled_content().html(),
204 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
207 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeImages
) {
208 DistillerPageWebContents
distiller_page(
209 shell()->web_contents()->GetBrowserContext(),
210 shell()->web_contents()->GetContainerBounds().size(),
211 scoped_ptr
<SourcePageHandleWebContents
>());
212 distiller_page_
= &distiller_page
;
214 base::RunLoop run_loop
;
215 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
218 // A relative link should've been updated.
219 EXPECT_THAT(distiller_result_
->distilled_content().html(),
220 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
221 EXPECT_THAT(distiller_result_
->distilled_content().html(),
222 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
226 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeVideos
) {
227 DistillerPageWebContents
distiller_page(
228 shell()->web_contents()->GetBrowserContext(),
229 shell()->web_contents()->GetContainerBounds().size(),
230 scoped_ptr
<SourcePageHandleWebContents
>());
231 distiller_page_
= &distiller_page
;
233 base::RunLoop run_loop
;
234 DistillPage(run_loop
.QuitClosure(), kVideoArticlePath
);
237 // A relative source/track should've been updated.
238 EXPECT_THAT(distiller_result_
->distilled_content().html(),
239 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
241 distiller_result_
->distilled_content().html(),
242 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
243 EXPECT_THAT(distiller_result_
->distilled_content().html(),
244 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
245 EXPECT_THAT(distiller_result_
->distilled_content().html(),
246 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
249 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, VisibilityDetection
) {
250 DistillerPageWebContents
distiller_page(
251 shell()->web_contents()->GetBrowserContext(),
252 shell()->web_contents()->GetContainerBounds().size(),
253 scoped_ptr
<SourcePageHandleWebContents
>());
254 distiller_page_
= &distiller_page
;
256 // visble_style.html and invisible_style.html only differ by the visibility
257 // internal stylesheet.
260 base::RunLoop run_loop
;
261 DistillPage(run_loop
.QuitClosure(), "/visible_style.html");
263 EXPECT_THAT(distiller_result_
->distilled_content().html(),
264 HasSubstr("Lorem ipsum"));
268 base::RunLoop run_loop
;
269 DistillPage(run_loop
.QuitClosure(), "/invisible_style.html");
271 EXPECT_THAT(distiller_result_
->distilled_content().html(),
272 Not(HasSubstr("Lorem ipsum")));
276 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
277 UsingCurrentWebContentsWrongUrl
) {
278 std::string
url("/bogus");
279 bool expect_new_web_contents
= true;
280 bool setup_main_frame_observer
= true;
281 bool wait_for_document_loaded
= true;
282 RunUseCurrentWebContentsTest(url
,
283 expect_new_web_contents
,
284 setup_main_frame_observer
,
285 wait_for_document_loaded
);
288 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
289 UsingCurrentWebContentsNoMainFrameObserver
) {
290 std::string
url(kSimpleArticlePath
);
291 bool expect_new_web_contents
= true;
292 bool setup_main_frame_observer
= false;
293 bool wait_for_document_loaded
= true;
294 RunUseCurrentWebContentsTest(url
,
295 expect_new_web_contents
,
296 setup_main_frame_observer
,
297 wait_for_document_loaded
);
300 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
301 UsingCurrentWebContentsNotFinishedLoadingYet
) {
302 std::string
url(kSimpleArticlePath
);
303 bool expect_new_web_contents
= false;
304 bool setup_main_frame_observer
= true;
305 bool wait_for_document_loaded
= false;
306 RunUseCurrentWebContentsTest(url
,
307 expect_new_web_contents
,
308 setup_main_frame_observer
,
309 wait_for_document_loaded
);
312 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
313 UsingCurrentWebContentsReadyForDistillation
) {
314 std::string
url(kSimpleArticlePath
);
315 bool expect_new_web_contents
= false;
316 bool setup_main_frame_observer
= true;
317 bool wait_for_document_loaded
= true;
318 RunUseCurrentWebContentsTest(url
,
319 expect_new_web_contents
,
320 setup_main_frame_observer
,
321 wait_for_document_loaded
);
324 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
325 const std::string
& url
,
326 bool expect_new_web_contents
,
327 bool setup_main_frame_observer
,
328 bool wait_for_document_loaded
) {
329 content::WebContents
* current_web_contents
= shell()->web_contents();
330 if (setup_main_frame_observer
) {
331 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
332 current_web_contents
);
334 base::RunLoop url_loaded_runner
;
335 WebContentsMainFrameHelper
main_frame_loaded(current_web_contents
,
336 url_loaded_runner
.QuitClosure(),
337 wait_for_document_loaded
);
338 current_web_contents
->GetController().LoadURL(
339 embedded_test_server()->GetURL(url
),
341 ui::PAGE_TRANSITION_TYPED
,
343 url_loaded_runner
.Run();
345 scoped_ptr
<content::WebContents
> old_web_contents_sptr(current_web_contents
);
346 scoped_ptr
<SourcePageHandleWebContents
> source_page_handle(
347 new SourcePageHandleWebContents(old_web_contents_sptr
.Pass()));
349 TestDistillerPageWebContents
distiller_page(
350 shell()->web_contents()->GetBrowserContext(),
351 shell()->web_contents()->GetContainerBounds().size(),
352 source_page_handle
.Pass(),
353 expect_new_web_contents
);
354 distiller_page_
= &distiller_page
;
356 base::RunLoop run_loop
;
357 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
360 // Sanity check of distillation process.
361 EXPECT_EQ(expect_new_web_contents
, distiller_page
.new_web_contents_created());
362 EXPECT_EQ("Test Page Title", distiller_result_
->title());
365 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, MarkupInfo
) {
366 DistillerPageWebContents
distiller_page(
367 shell()->web_contents()->GetBrowserContext(),
368 shell()->web_contents()->GetContainerBounds().size(),
369 scoped_ptr
<SourcePageHandleWebContents
>());
370 distiller_page_
= &distiller_page
;
372 base::RunLoop run_loop
;
373 DistillPage(run_loop
.QuitClosure(), "/markup_article.html");
376 EXPECT_THAT(distiller_result_
->distilled_content().html(),
377 HasSubstr("Lorem ipsum"));
378 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_
->title());
380 const proto::MarkupInfo markup_info
= distiller_result_
->markup_info();
381 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info
.title());
382 EXPECT_EQ("Article", markup_info
.type());
383 EXPECT_EQ("http://test/markup.html", markup_info
.url());
384 EXPECT_EQ("This page tests Markup Info.", markup_info
.description());
385 EXPECT_EQ("Whoever Published", markup_info
.publisher());
386 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info
.copyright());
387 EXPECT_EQ("Whoever Authored", markup_info
.author());
389 const proto::MarkupArticle markup_article
= markup_info
.article();
390 EXPECT_EQ("Whatever Section", markup_article
.section());
391 EXPECT_EQ("July 23, 2014", markup_article
.published_time());
392 EXPECT_EQ("2014-07-23T23:59", markup_article
.modified_time());
393 EXPECT_EQ("", markup_article
.expiration_time());
394 ASSERT_EQ(1, markup_article
.authors_size());
395 EXPECT_EQ("Whoever Authored", markup_article
.authors(0));
397 ASSERT_EQ(2, markup_info
.images_size());
399 const proto::MarkupImage markup_image1
= markup_info
.images(0);
400 EXPECT_EQ("http://test/markup1.jpeg", markup_image1
.url());
401 EXPECT_EQ("https://test/markup1.jpeg", markup_image1
.secure_url());
402 EXPECT_EQ("jpeg", markup_image1
.type());
403 EXPECT_EQ("", markup_image1
.caption());
404 EXPECT_EQ(600, markup_image1
.width());
405 EXPECT_EQ(400, markup_image1
.height());
407 const proto::MarkupImage markup_image2
= markup_info
.images(1);
408 EXPECT_EQ("http://test/markup2.gif", markup_image2
.url());
409 EXPECT_EQ("https://test/markup2.gif", markup_image2
.secure_url());
410 EXPECT_EQ("gif", markup_image2
.type());
411 EXPECT_EQ("", markup_image2
.caption());
412 EXPECT_EQ(1000, markup_image2
.width());
413 EXPECT_EQ(600, markup_image2
.height());
416 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
417 TestTitleAndContentAreNeverEmpty
) {
418 const std::string some_title
= "some title";
419 const std::string some_content
= "some content";
420 const std::string no_title
=
421 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE
);
422 const std::string no_content
=
423 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT
);
425 { // Test non-empty title and content for article.
426 scoped_ptr
<DistilledArticleProto
> article_proto(
427 new DistilledArticleProto());
428 article_proto
->set_title(some_title
);
429 (*(article_proto
->add_pages())).set_html(some_content
);
430 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
431 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
432 EXPECT_THAT(html
, HasSubstr(some_title
));
433 EXPECT_THAT(html
, HasSubstr(some_content
));
434 EXPECT_THAT(html
, Not(HasSubstr(no_title
)));
435 EXPECT_THAT(html
, Not(HasSubstr(no_content
)));
438 { // Test empty title and content for article.
439 scoped_ptr
<DistilledArticleProto
> article_proto(
440 new DistilledArticleProto());
441 article_proto
->set_title("");
442 (*(article_proto
->add_pages())).set_html("");
443 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
444 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
445 EXPECT_THAT(html
, HasSubstr(no_title
));
446 EXPECT_THAT(html
, HasSubstr(no_content
));
447 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
448 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
451 { // Test missing title and non-empty content for article.
452 scoped_ptr
<DistilledArticleProto
> article_proto(
453 new DistilledArticleProto());
454 (*(article_proto
->add_pages())).set_html(some_content
);
455 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
456 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
457 EXPECT_THAT(html
, HasSubstr(no_title
));
458 EXPECT_THAT(html
, HasSubstr(no_content
));
459 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
460 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
463 { // Test non-empty title and missing content for article.
464 scoped_ptr
<DistilledArticleProto
> article_proto(
465 new DistilledArticleProto());
466 article_proto
->set_title(some_title
);
467 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
468 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
469 EXPECT_THAT(html
, HasSubstr(no_title
));
470 EXPECT_THAT(html
, HasSubstr(no_content
));
471 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
472 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
475 { // Test non-empty title and content for page.
476 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
477 page_proto
->set_title(some_title
);
478 page_proto
->set_html(some_content
);
479 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
480 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
481 EXPECT_THAT(html
, HasSubstr(some_title
));
482 EXPECT_THAT(html
, HasSubstr(some_content
));
483 EXPECT_THAT(html
, Not(HasSubstr(no_title
)));
484 EXPECT_THAT(html
, Not(HasSubstr(no_content
)));
487 { // Test empty title and content for page.
488 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
489 page_proto
->set_title("");
490 page_proto
->set_html("");
491 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
492 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
493 EXPECT_THAT(html
, HasSubstr(no_title
));
494 EXPECT_THAT(html
, HasSubstr(no_content
));
495 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
496 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
499 { // Test missing title and non-empty content for page.
500 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
501 page_proto
->set_html(some_content
);
502 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
503 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
504 EXPECT_THAT(html
, HasSubstr(no_title
));
505 EXPECT_THAT(html
, HasSubstr(some_content
));
506 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
507 EXPECT_THAT(html
, Not(HasSubstr(no_content
)));
510 { // Test non-empty title and missing content for page.
511 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
512 page_proto
->set_title(some_title
);
513 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
514 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
515 EXPECT_THAT(html
, HasSubstr(some_title
));
516 EXPECT_THAT(html
, HasSubstr(no_content
));
517 EXPECT_THAT(html
, Not(HasSubstr(no_title
)));
518 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
522 } // namespace dom_distiller