1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
13 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
14 #include "components/dom_distiller/core/viewer.h"
15 #include "content/public/browser/browser_context.h"
16 #include "content/public/browser/navigation_controller.h"
17 #include "content/public/browser/render_frame_host.h"
18 #include "content/public/browser/web_contents_observer.h"
19 #include "content/public/test/content_browser_test.h"
20 #include "content/shell/browser/shell.h"
21 #include "grit/components_strings.h"
22 #include "net/test/embedded_test_server/embedded_test_server.h"
23 #include "testing/gmock/include/gmock/gmock.h"
24 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
25 #include "ui/base/l10n/l10n_util.h"
26 #include "ui/base/resource/resource_bundle.h"
28 using content::ContentBrowserTest
;
29 using testing::ContainsRegex
;
30 using testing::HasSubstr
;
35 // Helper class to know how far in the loading process the current WebContents
36 // has come. It will call the callback either after
37 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
38 // main frame, based on the value of |wait_for_document_loaded|.
39 class WebContentsMainFrameHelper
: public content::WebContentsObserver
{
41 WebContentsMainFrameHelper(content::WebContents
* web_contents
,
42 const base::Closure
& callback
,
43 bool wait_for_document_loaded
)
44 : WebContentsObserver(web_contents
),
46 wait_for_document_loaded_(wait_for_document_loaded
) {}
48 void DidCommitProvisionalLoadForFrame(
49 content::RenderFrameHost
* render_frame_host
,
51 ui::PageTransition transition_type
) override
{
52 if (wait_for_document_loaded_
)
54 if (!render_frame_host
->GetParent())
58 void DocumentLoadedInFrame(
59 content::RenderFrameHost
* render_frame_host
) override
{
60 if (wait_for_document_loaded_
) {
61 if (!render_frame_host
->GetParent())
67 base::Closure callback_
;
68 bool wait_for_document_loaded_
;
73 namespace dom_distiller
{
75 const char* kSimpleArticlePath
= "/simple_article.html";
76 const char* kVideoArticlePath
= "/video_article.html";
78 class DistillerPageWebContentsTest
: public ContentBrowserTest
{
80 // ContentBrowserTest:
81 void SetUpOnMainThread() override
{
82 AddComponentsResources();
84 ContentBrowserTest::SetUpOnMainThread();
87 void DistillPage(const base::Closure
& quit_closure
, const std::string
& url
) {
88 quit_closure_
= quit_closure
;
89 distiller_page_
->DistillPage(
90 embedded_test_server()->GetURL(url
),
91 dom_distiller::proto::DomDistillerOptions(),
92 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished
,
96 void OnPageDistillationFinished(
97 scoped_ptr
<proto::DomDistillerResult
> distiller_result
,
98 bool distillation_successful
) {
99 distiller_result_
= distiller_result
.Pass();
104 void AddComponentsResources() {
105 base::FilePath pak_file
;
106 base::FilePath pak_dir
;
107 #if defined(OS_ANDROID)
108 CHECK(PathService::Get(base::DIR_ANDROID_APP_DATA
, &pak_dir
));
109 pak_dir
= pak_dir
.Append(FILE_PATH_LITERAL("paks"));
111 PathService::Get(base::DIR_MODULE
, &pak_dir
);
114 pak_dir
.Append(FILE_PATH_LITERAL("components_tests_resources.pak"));
115 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
116 pak_file
, ui::SCALE_FACTOR_NONE
);
119 void SetUpTestServer() {
121 PathService::Get(base::DIR_SOURCE_ROOT
, &path
);
122 path
= path
.AppendASCII("components/test/data/dom_distiller");
123 embedded_test_server()->ServeFilesFromDirectory(path
);
124 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
128 void RunUseCurrentWebContentsTest(const std::string
& url
,
129 bool expect_new_web_contents
,
130 bool setup_main_frame_observer
,
131 bool wait_for_document_loaded
);
133 DistillerPageWebContents
* distiller_page_
;
134 base::Closure quit_closure_
;
135 scoped_ptr
<proto::DomDistillerResult
> distiller_result_
;
138 // Use this class to be able to leak the WebContents, which is needed for when
139 // the current WebContents is used for distillation.
140 class TestDistillerPageWebContents
: public DistillerPageWebContents
{
142 TestDistillerPageWebContents(
143 content::BrowserContext
* browser_context
,
144 const gfx::Size
& render_view_size
,
145 scoped_ptr
<SourcePageHandleWebContents
> optional_web_contents_handle
,
146 bool expect_new_web_contents
)
147 : DistillerPageWebContents(browser_context
, render_view_size
,
148 optional_web_contents_handle
.Pass()),
149 expect_new_web_contents_(expect_new_web_contents
),
150 new_web_contents_created_(false) {}
152 void CreateNewWebContents(const GURL
& url
) override
{
153 ASSERT_EQ(true, expect_new_web_contents_
);
154 new_web_contents_created_
= true;
155 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
156 // the WebContents, so intentionally leak WebContents here, since it is
157 // owned by the shell.
158 content::WebContents
* web_contents
= web_contents_
.release();
159 web_contents
->GetLastCommittedURL();
160 DistillerPageWebContents::CreateNewWebContents(url
);
163 ~TestDistillerPageWebContents() override
{
164 if (!expect_new_web_contents_
) {
165 // Intentionally leaking WebContents, since it is owned by the shell.
166 content::WebContents
* web_contents
= web_contents_
.release();
167 web_contents
->GetLastCommittedURL();
171 bool new_web_contents_created() { return new_web_contents_created_
; }
174 bool expect_new_web_contents_
;
175 bool new_web_contents_created_
;
178 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, BasicDistillationWorks
) {
179 DistillerPageWebContents
distiller_page(
180 shell()->web_contents()->GetBrowserContext(),
181 shell()->web_contents()->GetContainerBounds().size(),
182 scoped_ptr
<SourcePageHandleWebContents
>());
183 distiller_page_
= &distiller_page
;
185 base::RunLoop run_loop
;
186 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
189 EXPECT_EQ("Test Page Title", distiller_result_
->title());
190 EXPECT_THAT(distiller_result_
->distilled_content().html(),
191 HasSubstr("Lorem ipsum"));
192 EXPECT_THAT(distiller_result_
->distilled_content().html(),
193 Not(HasSubstr("questionable content")));
194 EXPECT_EQ("", distiller_result_
->pagination_info().next_page());
195 EXPECT_EQ("", distiller_result_
->pagination_info().prev_page());
198 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeLinks
) {
199 DistillerPageWebContents
distiller_page(
200 shell()->web_contents()->GetBrowserContext(),
201 shell()->web_contents()->GetContainerBounds().size(),
202 scoped_ptr
<SourcePageHandleWebContents
>());
203 distiller_page_
= &distiller_page
;
205 base::RunLoop run_loop
;
206 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
209 // A relative link should've been updated.
210 EXPECT_THAT(distiller_result_
->distilled_content().html(),
211 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
212 EXPECT_THAT(distiller_result_
->distilled_content().html(),
213 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
216 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeImages
) {
217 DistillerPageWebContents
distiller_page(
218 shell()->web_contents()->GetBrowserContext(),
219 shell()->web_contents()->GetContainerBounds().size(),
220 scoped_ptr
<SourcePageHandleWebContents
>());
221 distiller_page_
= &distiller_page
;
223 base::RunLoop run_loop
;
224 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
227 // A relative link should've been updated.
228 EXPECT_THAT(distiller_result_
->distilled_content().html(),
229 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
230 EXPECT_THAT(distiller_result_
->distilled_content().html(),
231 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
235 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeVideos
) {
236 DistillerPageWebContents
distiller_page(
237 shell()->web_contents()->GetBrowserContext(),
238 shell()->web_contents()->GetContainerBounds().size(),
239 scoped_ptr
<SourcePageHandleWebContents
>());
240 distiller_page_
= &distiller_page
;
242 base::RunLoop run_loop
;
243 DistillPage(run_loop
.QuitClosure(), kVideoArticlePath
);
246 // A relative source/track should've been updated.
247 EXPECT_THAT(distiller_result_
->distilled_content().html(),
248 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
250 distiller_result_
->distilled_content().html(),
251 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
252 EXPECT_THAT(distiller_result_
->distilled_content().html(),
253 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
254 EXPECT_THAT(distiller_result_
->distilled_content().html(),
255 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
258 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, VisibilityDetection
) {
259 DistillerPageWebContents
distiller_page(
260 shell()->web_contents()->GetBrowserContext(),
261 shell()->web_contents()->GetContainerBounds().size(),
262 scoped_ptr
<SourcePageHandleWebContents
>());
263 distiller_page_
= &distiller_page
;
265 // visble_style.html and invisible_style.html only differ by the visibility
266 // internal stylesheet.
269 base::RunLoop run_loop
;
270 DistillPage(run_loop
.QuitClosure(), "/visible_style.html");
272 EXPECT_THAT(distiller_result_
->distilled_content().html(),
273 HasSubstr("Lorem ipsum"));
277 base::RunLoop run_loop
;
278 DistillPage(run_loop
.QuitClosure(), "/invisible_style.html");
280 EXPECT_THAT(distiller_result_
->distilled_content().html(),
281 Not(HasSubstr("Lorem ipsum")));
285 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
286 UsingCurrentWebContentsWrongUrl
) {
287 std::string
url("/bogus");
288 bool expect_new_web_contents
= true;
289 bool setup_main_frame_observer
= true;
290 bool wait_for_document_loaded
= true;
291 RunUseCurrentWebContentsTest(url
,
292 expect_new_web_contents
,
293 setup_main_frame_observer
,
294 wait_for_document_loaded
);
297 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
298 UsingCurrentWebContentsNoMainFrameObserver
) {
299 std::string
url(kSimpleArticlePath
);
300 bool expect_new_web_contents
= true;
301 bool setup_main_frame_observer
= false;
302 bool wait_for_document_loaded
= true;
303 RunUseCurrentWebContentsTest(url
,
304 expect_new_web_contents
,
305 setup_main_frame_observer
,
306 wait_for_document_loaded
);
309 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
310 UsingCurrentWebContentsNotFinishedLoadingYet
) {
311 std::string
url(kSimpleArticlePath
);
312 bool expect_new_web_contents
= false;
313 bool setup_main_frame_observer
= true;
314 bool wait_for_document_loaded
= false;
315 RunUseCurrentWebContentsTest(url
,
316 expect_new_web_contents
,
317 setup_main_frame_observer
,
318 wait_for_document_loaded
);
321 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
322 UsingCurrentWebContentsReadyForDistillation
) {
323 std::string
url(kSimpleArticlePath
);
324 bool expect_new_web_contents
= false;
325 bool setup_main_frame_observer
= true;
326 bool wait_for_document_loaded
= true;
327 RunUseCurrentWebContentsTest(url
,
328 expect_new_web_contents
,
329 setup_main_frame_observer
,
330 wait_for_document_loaded
);
333 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
334 const std::string
& url
,
335 bool expect_new_web_contents
,
336 bool setup_main_frame_observer
,
337 bool wait_for_document_loaded
) {
338 content::WebContents
* current_web_contents
= shell()->web_contents();
339 if (setup_main_frame_observer
) {
340 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
341 current_web_contents
);
343 base::RunLoop url_loaded_runner
;
344 WebContentsMainFrameHelper
main_frame_loaded(current_web_contents
,
345 url_loaded_runner
.QuitClosure(),
346 wait_for_document_loaded
);
347 current_web_contents
->GetController().LoadURL(
348 embedded_test_server()->GetURL(url
),
350 ui::PAGE_TRANSITION_TYPED
,
352 url_loaded_runner
.Run();
354 scoped_ptr
<content::WebContents
> old_web_contents_sptr(current_web_contents
);
355 scoped_ptr
<SourcePageHandleWebContents
> source_page_handle(
356 new SourcePageHandleWebContents(old_web_contents_sptr
.Pass()));
358 TestDistillerPageWebContents
distiller_page(
359 shell()->web_contents()->GetBrowserContext(),
360 shell()->web_contents()->GetContainerBounds().size(),
361 source_page_handle
.Pass(),
362 expect_new_web_contents
);
363 distiller_page_
= &distiller_page
;
365 base::RunLoop run_loop
;
366 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
369 // Sanity check of distillation process.
370 EXPECT_EQ(expect_new_web_contents
, distiller_page
.new_web_contents_created());
371 EXPECT_EQ("Test Page Title", distiller_result_
->title());
374 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, MarkupInfo
) {
375 DistillerPageWebContents
distiller_page(
376 shell()->web_contents()->GetBrowserContext(),
377 shell()->web_contents()->GetContainerBounds().size(),
378 scoped_ptr
<SourcePageHandleWebContents
>());
379 distiller_page_
= &distiller_page
;
381 base::RunLoop run_loop
;
382 DistillPage(run_loop
.QuitClosure(), "/markup_article.html");
385 EXPECT_THAT(distiller_result_
->distilled_content().html(),
386 HasSubstr("Lorem ipsum"));
387 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_
->title());
389 const proto::MarkupInfo markup_info
= distiller_result_
->markup_info();
390 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info
.title());
391 EXPECT_EQ("Article", markup_info
.type());
392 EXPECT_EQ("http://test/markup.html", markup_info
.url());
393 EXPECT_EQ("This page tests Markup Info.", markup_info
.description());
394 EXPECT_EQ("Whoever Published", markup_info
.publisher());
395 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info
.copyright());
396 EXPECT_EQ("Whoever Authored", markup_info
.author());
398 const proto::MarkupArticle markup_article
= markup_info
.article();
399 EXPECT_EQ("Whatever Section", markup_article
.section());
400 EXPECT_EQ("July 23, 2014", markup_article
.published_time());
401 EXPECT_EQ("2014-07-23T23:59", markup_article
.modified_time());
402 EXPECT_EQ("", markup_article
.expiration_time());
403 ASSERT_EQ(1, markup_article
.authors_size());
404 EXPECT_EQ("Whoever Authored", markup_article
.authors(0));
406 ASSERT_EQ(2, markup_info
.images_size());
408 const proto::MarkupImage markup_image1
= markup_info
.images(0);
409 EXPECT_EQ("http://test/markup1.jpeg", markup_image1
.url());
410 EXPECT_EQ("https://test/markup1.jpeg", markup_image1
.secure_url());
411 EXPECT_EQ("jpeg", markup_image1
.type());
412 EXPECT_EQ("", markup_image1
.caption());
413 EXPECT_EQ(600, markup_image1
.width());
414 EXPECT_EQ(400, markup_image1
.height());
416 const proto::MarkupImage markup_image2
= markup_info
.images(1);
417 EXPECT_EQ("http://test/markup2.gif", markup_image2
.url());
418 EXPECT_EQ("https://test/markup2.gif", markup_image2
.secure_url());
419 EXPECT_EQ("gif", markup_image2
.type());
420 EXPECT_EQ("", markup_image2
.caption());
421 EXPECT_EQ(1000, markup_image2
.width());
422 EXPECT_EQ(600, markup_image2
.height());
425 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, TestTitleNeverEmpty
) {
426 const std::string some_title
= "some title";
427 const std::string no_title
=
428 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE
);
430 { // Test empty title for article.
431 scoped_ptr
<DistilledArticleProto
> article_proto(
432 new DistilledArticleProto());
433 article_proto
->set_title("");
434 (*(article_proto
->add_pages())).set_html("");
435 std::string html
= viewer::GetUnsafeArticleTemplateHtml(
436 &article_proto
.get()->pages(0), DistilledPagePrefs::LIGHT
,
437 DistilledPagePrefs::SERIF
);
438 EXPECT_THAT(html
, HasSubstr(no_title
));
439 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
442 { // Test empty title for page.
443 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
444 page_proto
->set_title("");
445 page_proto
->set_html("");
446 std::string html
= viewer::GetUnsafeArticleTemplateHtml(
447 page_proto
.get(), DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
448 EXPECT_THAT(html
, HasSubstr(no_title
));
449 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
452 { // Test missing title for page.
453 scoped_ptr
<DistilledPageProto
> page_proto(new DistilledPageProto());
454 std::string html
= viewer::GetUnsafeArticleTemplateHtml(
455 page_proto
.get(), DistilledPagePrefs::LIGHT
, DistilledPagePrefs::SERIF
);
456 EXPECT_THAT(html
, HasSubstr(no_title
));
457 EXPECT_THAT(html
, Not(HasSubstr(some_title
)));
461 } // namespace dom_distiller