1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
13 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
14 #include "components/dom_distiller/core/viewer.h"
15 #include "content/public/browser/browser_context.h"
16 #include "content/public/browser/navigation_controller.h"
17 #include "content/public/browser/render_frame_host.h"
18 #include "content/public/browser/web_contents_observer.h"
19 #include "content/public/test/content_browser_test.h"
20 #include "content/shell/browser/shell.h"
21 #include "grit/components_strings.h"
22 #include "net/test/embedded_test_server/embedded_test_server.h"
23 #include "testing/gmock/include/gmock/gmock.h"
24 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
25 #include "ui/base/l10n/l10n_util.h"
26 #include "ui/base/resource/resource_bundle.h"
28 using content::ContentBrowserTest
;
29 using testing::ContainsRegex
;
30 using testing::HasSubstr
;
33 namespace dom_distiller
{
35 const char* kSimpleArticlePath
= "/simple_article.html";
36 const char* kVideoArticlePath
= "/video_article.html";
38 class DistillerPageWebContentsTest
: public ContentBrowserTest
{
40 // ContentBrowserTest:
41 virtual void SetUpOnMainThread() OVERRIDE
{
42 AddComponentsResources();
44 ContentBrowserTest::SetUpOnMainThread();
47 void DistillPage(const base::Closure
& quit_closure
, const std::string
& url
) {
48 quit_closure_
= quit_closure
;
49 distiller_page_
->DistillPage(
50 embedded_test_server()->GetURL(url
),
51 dom_distiller::proto::DomDistillerOptions(),
52 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished
,
56 void OnPageDistillationFinished(
57 scoped_ptr
<proto::DomDistillerResult
> distiller_result
,
58 bool distillation_successful
) {
59 distiller_result_
= distiller_result
.Pass();
64 void AddComponentsResources() {
65 base::FilePath pak_file
;
66 base::FilePath pak_dir
;
67 PathService::Get(base::DIR_MODULE
, &pak_dir
);
68 pak_file
= pak_dir
.Append(FILE_PATH_LITERAL("components_resources.pak"));
69 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
70 pak_file
, ui::SCALE_FACTOR_NONE
);
73 void SetUpTestServer() {
75 PathService::Get(base::DIR_SOURCE_ROOT
, &path
);
76 path
= path
.AppendASCII("components/test/data/dom_distiller");
77 embedded_test_server()->ServeFilesFromDirectory(path
);
78 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
82 void RunUseCurrentWebContentsTest(const std::string
& url
,
83 bool expect_new_web_contents
,
84 bool setup_main_frame_observer
,
85 bool wait_for_document_loaded
);
87 DistillerPageWebContents
* distiller_page_
;
88 base::Closure quit_closure_
;
89 scoped_ptr
<proto::DomDistillerResult
> distiller_result_
;
92 // Use this class to be able to leak the WebContents, which is needed for when
93 // the current WebContents is used for distillation.
94 class TestDistillerPageWebContents
: public DistillerPageWebContents
{
96 TestDistillerPageWebContents(
97 content::BrowserContext
* browser_context
,
98 const gfx::Size
& render_view_size
,
99 scoped_ptr
<SourcePageHandleWebContents
> optional_web_contents_handle
,
100 bool expect_new_web_contents
)
101 : DistillerPageWebContents(browser_context
, render_view_size
,
102 optional_web_contents_handle
.Pass()),
103 expect_new_web_contents_(expect_new_web_contents
),
104 new_web_contents_created_(false) {}
106 virtual void CreateNewWebContents(const GURL
& url
) OVERRIDE
{
107 ASSERT_EQ(true, expect_new_web_contents_
);
108 new_web_contents_created_
= true;
109 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
110 // the WebContents, so intentionally leak WebContents here, since it is
111 // owned by the shell.
112 content::WebContents
* web_contents
= web_contents_
.release();
113 web_contents
->GetLastCommittedURL();
114 DistillerPageWebContents::CreateNewWebContents(url
);
117 virtual ~TestDistillerPageWebContents() {
118 if (!expect_new_web_contents_
) {
119 // Intentionally leaking WebContents, since it is owned by the shell.
120 content::WebContents
* web_contents
= web_contents_
.release();
121 web_contents
->GetLastCommittedURL();
125 bool new_web_contents_created() { return new_web_contents_created_
; }
128 bool expect_new_web_contents_
;
129 bool new_web_contents_created_
;
132 // Helper class to know how far in the loading process the current WebContents
133 // has come. It will call the callback either after
134 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
135 // main frame, based on the value of |wait_for_document_loaded|.
136 class WebContentsMainFrameHelper
: public content::WebContentsObserver
{
138 WebContentsMainFrameHelper(content::WebContents
* web_contents
,
139 const base::Closure
& callback
,
140 bool wait_for_document_loaded
)
141 : WebContentsObserver(web_contents
),
143 wait_for_document_loaded_(wait_for_document_loaded
) {}
145 virtual void DidCommitProvisionalLoadForFrame(
146 content::RenderFrameHost
* render_frame_host
,
148 ui::PageTransition transition_type
) OVERRIDE
{
149 if (wait_for_document_loaded_
)
151 if (!render_frame_host
->GetParent())
155 virtual void DocumentLoadedInFrame(
156 content::RenderFrameHost
* render_frame_host
) OVERRIDE
{
157 if (wait_for_document_loaded_
) {
158 if (!render_frame_host
->GetParent())
164 base::Closure callback_
;
165 bool wait_for_document_loaded_
;
168 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, BasicDistillationWorks
) {
169 DistillerPageWebContents
distiller_page(
170 shell()->web_contents()->GetBrowserContext(),
171 shell()->web_contents()->GetContainerBounds().size(),
172 scoped_ptr
<SourcePageHandleWebContents
>());
173 distiller_page_
= &distiller_page
;
175 base::RunLoop run_loop
;
176 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
179 EXPECT_EQ("Test Page Title", distiller_result_
->title());
180 EXPECT_THAT(distiller_result_
->distilled_content().html(),
181 HasSubstr("Lorem ipsum"));
182 EXPECT_THAT(distiller_result_
->distilled_content().html(),
183 Not(HasSubstr("questionable content")));
184 EXPECT_EQ("", distiller_result_
->pagination_info().next_page());
185 EXPECT_EQ("", distiller_result_
->pagination_info().prev_page());
188 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeLinks
) {
189 DistillerPageWebContents
distiller_page(
190 shell()->web_contents()->GetBrowserContext(),
191 shell()->web_contents()->GetContainerBounds().size(),
192 scoped_ptr
<SourcePageHandleWebContents
>());
193 distiller_page_
= &distiller_page
;
195 base::RunLoop run_loop
;
196 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
199 // A relative link should've been updated.
200 EXPECT_THAT(distiller_result_
->distilled_content().html(),
201 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
202 EXPECT_THAT(distiller_result_
->distilled_content().html(),
203 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
206 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeImages
) {
207 DistillerPageWebContents
distiller_page(
208 shell()->web_contents()->GetBrowserContext(),
209 shell()->web_contents()->GetContainerBounds().size(),
210 scoped_ptr
<SourcePageHandleWebContents
>());
211 distiller_page_
= &distiller_page
;
213 base::RunLoop run_loop
;
214 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
217 // A relative link should've been updated.
218 EXPECT_THAT(distiller_result_
->distilled_content().html(),
219 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
220 EXPECT_THAT(distiller_result_
->distilled_content().html(),
221 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
225 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeVideos
) {
226 DistillerPageWebContents
distiller_page(
227 shell()->web_contents()->GetBrowserContext(),
228 shell()->web_contents()->GetContainerBounds().size(),
229 scoped_ptr
<SourcePageHandleWebContents
>());
230 distiller_page_
= &distiller_page
;
232 base::RunLoop run_loop
;
233 DistillPage(run_loop
.QuitClosure(), kVideoArticlePath
);
236 // A relative source/track should've been updated.
237 EXPECT_THAT(distiller_result_
->distilled_content().html(),
238 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
240 distiller_result_
->distilled_content().html(),
241 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
242 EXPECT_THAT(distiller_result_
->distilled_content().html(),
243 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
244 EXPECT_THAT(distiller_result_
->distilled_content().html(),
245 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
248 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, VisibilityDetection
) {
249 DistillerPageWebContents
distiller_page(
250 shell()->web_contents()->GetBrowserContext(),
251 shell()->web_contents()->GetContainerBounds().size(),
252 scoped_ptr
<SourcePageHandleWebContents
>());
253 distiller_page_
= &distiller_page
;
255 // visble_style.html and invisible_style.html only differ by the visibility
256 // internal stylesheet.
259 base::RunLoop run_loop
;
260 DistillPage(run_loop
.QuitClosure(), "/visible_style.html");
262 EXPECT_THAT(distiller_result_
->distilled_content().html(),
263 HasSubstr("Lorem ipsum"));
267 base::RunLoop run_loop
;
268 DistillPage(run_loop
.QuitClosure(), "/invisible_style.html");
270 EXPECT_THAT(distiller_result_
->distilled_content().html(),
271 Not(HasSubstr("Lorem ipsum")));
275 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
276 UsingCurrentWebContentsWrongUrl
) {
277 std::string
url("/bogus");
278 bool expect_new_web_contents
= true;
279 bool setup_main_frame_observer
= true;
280 bool wait_for_document_loaded
= true;
281 RunUseCurrentWebContentsTest(url
,
282 expect_new_web_contents
,
283 setup_main_frame_observer
,
284 wait_for_document_loaded
);
287 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
288 UsingCurrentWebContentsNoMainFrameObserver
) {
289 std::string
url(kSimpleArticlePath
);
290 bool expect_new_web_contents
= true;
291 bool setup_main_frame_observer
= false;
292 bool wait_for_document_loaded
= true;
293 RunUseCurrentWebContentsTest(url
,
294 expect_new_web_contents
,
295 setup_main_frame_observer
,
296 wait_for_document_loaded
);
299 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
300 UsingCurrentWebContentsNotFinishedLoadingYet
) {
301 std::string
url(kSimpleArticlePath
);
302 bool expect_new_web_contents
= false;
303 bool setup_main_frame_observer
= true;
304 bool wait_for_document_loaded
= false;
305 RunUseCurrentWebContentsTest(url
,
306 expect_new_web_contents
,
307 setup_main_frame_observer
,
308 wait_for_document_loaded
);
311 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
312 UsingCurrentWebContentsReadyForDistillation
) {
313 std::string
url(kSimpleArticlePath
);
314 bool expect_new_web_contents
= false;
315 bool setup_main_frame_observer
= true;
316 bool wait_for_document_loaded
= true;
317 RunUseCurrentWebContentsTest(url
,
318 expect_new_web_contents
,
319 setup_main_frame_observer
,
320 wait_for_document_loaded
);
323 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
324 const std::string
& url
,
325 bool expect_new_web_contents
,
326 bool setup_main_frame_observer
,
327 bool wait_for_document_loaded
) {
328 content::WebContents
* current_web_contents
= shell()->web_contents();
329 if (setup_main_frame_observer
) {
330 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
331 current_web_contents
);
333 base::RunLoop url_loaded_runner
;
334 WebContentsMainFrameHelper
main_frame_loaded(current_web_contents
,
335 url_loaded_runner
.QuitClosure(),
336 wait_for_document_loaded
);
337 current_web_contents
->GetController().LoadURL(
338 embedded_test_server()->GetURL(url
),
340 ui::PAGE_TRANSITION_TYPED
,
342 url_loaded_runner
.Run();
344 scoped_ptr
<content::WebContents
> old_web_contents_sptr(current_web_contents
);
345 scoped_ptr
<SourcePageHandleWebContents
> source_page_handle(
346 new SourcePageHandleWebContents(old_web_contents_sptr
.Pass()));
348 TestDistillerPageWebContents
distiller_page(
349 shell()->web_contents()->GetBrowserContext(),
350 shell()->web_contents()->GetContainerBounds().size(),
351 source_page_handle
.Pass(),
352 expect_new_web_contents
);
353 distiller_page_
= &distiller_page
;
355 base::RunLoop run_loop
;
356 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
359 // Sanity check of distillation process.
360 EXPECT_EQ(expect_new_web_contents
, distiller_page
.new_web_contents_created());
361 EXPECT_EQ("Test Page Title", distiller_result_
->title());
364 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, MarkupInfo
) {
365 DistillerPageWebContents
distiller_page(
366 shell()->web_contents()->GetBrowserContext(),
367 shell()->web_contents()->GetContainerBounds().size(),
368 scoped_ptr
<SourcePageHandleWebContents
>());
369 distiller_page_
= &distiller_page
;
371 base::RunLoop run_loop
;
372 DistillPage(run_loop
.QuitClosure(), "/markup_article.html");
375 EXPECT_THAT(distiller_result_
->distilled_content().html(),
376 HasSubstr("Lorem ipsum"));
377 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_
->title());
379 const proto::MarkupInfo markup_info
= distiller_result_
->markup_info();
380 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info
.title());
381 EXPECT_EQ("Article", markup_info
.type());
382 EXPECT_EQ("http://test/markup.html", markup_info
.url());
383 EXPECT_EQ("This page tests Markup Info.", markup_info
.description());
384 EXPECT_EQ("Whoever Published", markup_info
.publisher());
385 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info
.copyright());
386 EXPECT_EQ("Whoever Authored", markup_info
.author());
388 const proto::MarkupArticle markup_article
= markup_info
.article();
389 EXPECT_EQ("Whatever Section", markup_article
.section());
390 EXPECT_EQ("July 23, 2014", markup_article
.published_time());
391 EXPECT_EQ("2014-07-23T23:59", markup_article
.modified_time());
392 EXPECT_EQ("", markup_article
.expiration_time());
393 ASSERT_EQ(1, markup_article
.authors_size());
394 EXPECT_EQ("Whoever Authored", markup_article
.authors(0));
396 ASSERT_EQ(2, markup_info
.images_size());
398 const proto::MarkupImage markup_image1
= markup_info
.images(0);
399 EXPECT_EQ("http://test/markup1.jpeg", markup_image1
.url());
400 EXPECT_EQ("https://test/markup1.jpeg", markup_image1
.secure_url());
401 EXPECT_EQ("jpeg", markup_image1
.type());
402 EXPECT_EQ("", markup_image1
.caption());
403 EXPECT_EQ(600, markup_image1
.width());
404 EXPECT_EQ(400, markup_image1
.height());
406 const proto::MarkupImage markup_image2
= markup_info
.images(1);
407 EXPECT_EQ("http://test/markup2.gif", markup_image2
.url());
408 EXPECT_EQ("https://test/markup2.gif", markup_image2
.secure_url());
409 EXPECT_EQ("gif", markup_image2
.type());
410 EXPECT_EQ("", markup_image2
.caption());
411 EXPECT_EQ(1000, markup_image2
.width());
412 EXPECT_EQ(600, markup_image2
.height());
415 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
416 TestTitleAndContentAreNeverEmpty
) {
417 const std::string some_title
= "some title";
418 const std::string some_content
= "some content";
419 const std::string no_title
=
420 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE
);
421 const std::string no_content
=
422 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT
);
424 { // Test non-empty title and content for article.
425 scoped_ptr
<DistilledArticleProto
> article_proto(
426 new DistilledArticleProto());
427 article_proto
->set_title(some_title
);
428 (*(article_proto
->add_pages())).set_html(some_content
);
429 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
430 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
431 EXPECT_THAT(html
, HasSubstr(some_title
));
432 EXPECT_THAT(html
, HasSubstr(some_content
));
433 EXPECT_THAT(html
, Not(HasSubstr(no_title
)));
434 EXPECT_THAT(html
, Not(HasSubstr(no_content
)));
437 { // Test empty title and content for article.
438 scoped_ptr
<DistilledArticleProto
> article_proto(
439 new DistilledArticleProto());
440 article_proto
->set_title("");
441 (*(article_proto
->add_pages())).set_html("");
442 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
443 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
444 EXPECT_THAT(html
, HasSubstr(no_title
));
445 EXPECT_THAT(html
, HasSubstr(no_content
));
446 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
447 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
450 { // Test missing title and non-empty content for article.
451 scoped_ptr
<DistilledArticleProto
> article_proto(
452 new DistilledArticleProto());
453 (*(article_proto
->add_pages())).set_html(some_content
);
454 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
455 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
456 EXPECT_THAT(html
, HasSubstr(no_title
));
457 EXPECT_THAT(html
, HasSubstr(no_content
));
458 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
459 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
462 { // Test non-empty title and missing content for article.
463 scoped_ptr
<DistilledArticleProto
> article_proto(
464 new DistilledArticleProto());
465 article_proto
->set_title(some_title
);
466 std::string html
= viewer::GetUnsafeArticleHtml(article_proto
.get(),
467 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
468 EXPECT_THAT(html
, HasSubstr(no_title
));
469 EXPECT_THAT(html
, HasSubstr(no_content
));
470 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
471 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
474 { // Test non-empty title and content for page.
475 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
476 page_proto
->set_title(some_title
);
477 page_proto
->set_html(some_content
);
478 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
479 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
480 EXPECT_THAT(html
, HasSubstr(some_title
));
481 EXPECT_THAT(html
, HasSubstr(some_content
));
482 EXPECT_THAT(html
, Not(HasSubstr(no_title
)));
483 EXPECT_THAT(html
, Not(HasSubstr(no_content
)));
486 { // Test empty title and content for page.
487 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
488 page_proto
->set_title("");
489 page_proto
->set_html("");
490 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
491 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
492 EXPECT_THAT(html
, HasSubstr(no_title
));
493 EXPECT_THAT(html
, HasSubstr(no_content
));
494 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
495 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
498 { // Test missing title and non-empty content for page.
499 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
500 page_proto
->set_html(some_content
);
501 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
502 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
503 EXPECT_THAT(html
, HasSubstr(no_title
));
504 EXPECT_THAT(html
, HasSubstr(some_content
));
505 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
506 EXPECT_THAT(html
, Not(HasSubstr(no_content
)));
509 { // Test non-empty title and missing content for page.
510 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
511 page_proto
->set_title(some_title
);
512 std::string html
= viewer::GetUnsafePartialArticleHtml(page_proto
.get(),
513 DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
514 EXPECT_THAT(html
, HasSubstr(some_title
));
515 EXPECT_THAT(html
, HasSubstr(no_content
));
516 EXPECT_THAT(html
, Not(HasSubstr(no_title
)));
517 EXPECT_THAT(html
, Not(HasSubstr(some_content
)));
521 } // namespace dom_distiller