1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "content/public/browser/browser_context.h"
13 #include "content/public/browser/navigation_controller.h"
14 #include "content/public/browser/render_frame_host.h"
15 #include "content/public/browser/web_contents_observer.h"
16 #include "content/public/test/content_browser_test.h"
17 #include "content/shell/browser/shell.h"
18 #include "grit/component_resources.h"
19 #include "net/test/embedded_test_server/embedded_test_server.h"
20 #include "testing/gmock/include/gmock/gmock.h"
21 #include "ui/base/resource/resource_bundle.h"
23 using content::ContentBrowserTest
;
24 using testing::ContainsRegex
;
25 using testing::HasSubstr
;
28 namespace dom_distiller
{
30 const char* kSimpleArticlePath
= "/simple_article.html";
31 const char* kVideoArticlePath
= "/video_article.html";
33 class DistillerPageWebContentsTest
: public ContentBrowserTest
{
35 // ContentBrowserTest:
36 virtual void SetUpOnMainThread() OVERRIDE
{
37 AddComponentsResources();
39 ContentBrowserTest::SetUpOnMainThread();
42 void DistillPage(const base::Closure
& quit_closure
, const std::string
& url
) {
43 quit_closure_
= quit_closure
;
44 distiller_page_
->DistillPage(
45 embedded_test_server()->GetURL(url
),
46 dom_distiller::proto::DomDistillerOptions(),
47 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished
,
51 void OnPageDistillationFinished(scoped_ptr
<DistilledPageInfo
> distilled_page
,
52 bool distillation_successful
) {
53 page_info_
= distilled_page
.Pass();
58 void AddComponentsResources() {
59 base::FilePath pak_file
;
60 base::FilePath pak_dir
;
61 PathService::Get(base::DIR_MODULE
, &pak_dir
);
62 pak_file
= pak_dir
.Append(FILE_PATH_LITERAL("components_resources.pak"));
63 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
64 pak_file
, ui::SCALE_FACTOR_NONE
);
67 void SetUpTestServer() {
69 PathService::Get(base::DIR_SOURCE_ROOT
, &path
);
70 path
= path
.AppendASCII("components/test/data/dom_distiller");
71 embedded_test_server()->ServeFilesFromDirectory(path
);
72 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
76 void RunUseCurrentWebContentsTest(const std::string
& url
,
77 bool expect_new_web_contents
,
78 bool setup_main_frame_observer
,
79 bool wait_for_document_loaded
);
81 DistillerPageWebContents
* distiller_page_
;
82 base::Closure quit_closure_
;
83 scoped_ptr
<DistilledPageInfo
> page_info_
;
86 // Use this class to be able to leak the WebContents, which is needed for when
87 // the current WebContents is used for distillation.
88 class TestDistillerPageWebContents
: public DistillerPageWebContents
{
90 TestDistillerPageWebContents(
91 content::BrowserContext
* browser_context
,
92 const gfx::Size
& render_view_size
,
93 scoped_ptr
<SourcePageHandleWebContents
> optional_web_contents_handle
,
94 bool expect_new_web_contents
)
95 : DistillerPageWebContents(browser_context
, render_view_size
,
96 optional_web_contents_handle
.Pass()),
97 expect_new_web_contents_(expect_new_web_contents
),
98 new_web_contents_created_(false) {}
100 virtual void CreateNewWebContents(const GURL
& url
) OVERRIDE
{
101 ASSERT_EQ(true, expect_new_web_contents_
);
102 new_web_contents_created_
= true;
103 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
104 // the WebContents, so intentionally leak WebContents here, since it is
105 // owned by the shell.
106 content::WebContents
* web_contents
= web_contents_
.release();
107 web_contents
->GetLastCommittedURL();
108 DistillerPageWebContents::CreateNewWebContents(url
);
111 virtual ~TestDistillerPageWebContents() {
112 if (!expect_new_web_contents_
) {
113 // Intentionally leaking WebContents, since it is owned by the shell.
114 content::WebContents
* web_contents
= web_contents_
.release();
115 web_contents
->GetLastCommittedURL();
119 bool new_web_contents_created() { return new_web_contents_created_
; }
122 bool expect_new_web_contents_
;
123 bool new_web_contents_created_
;
126 // Helper class to know how far in the loading process the current WebContents
127 // has come. It will call the callback either after
128 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
129 // main frame, based on the value of |wait_for_document_loaded|.
130 class WebContentsMainFrameHelper
: public content::WebContentsObserver
{
132 WebContentsMainFrameHelper(content::WebContents
* web_contents
,
133 const base::Closure
& callback
,
134 bool wait_for_document_loaded
)
135 : WebContentsObserver(web_contents
),
137 wait_for_document_loaded_(wait_for_document_loaded
) {}
139 virtual void DidCommitProvisionalLoadForFrame(
140 content::RenderFrameHost
* render_frame_host
,
142 content::PageTransition transition_type
) OVERRIDE
{
143 if (wait_for_document_loaded_
)
145 if (!render_frame_host
->GetParent())
149 virtual void DocumentLoadedInFrame(
150 content::RenderFrameHost
* render_frame_host
) OVERRIDE
{
151 if (wait_for_document_loaded_
) {
152 if (!render_frame_host
->GetParent())
158 base::Closure callback_
;
159 bool wait_for_document_loaded_
;
162 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, BasicDistillationWorks
) {
163 DistillerPageWebContents
distiller_page(
164 shell()->web_contents()->GetBrowserContext(),
165 shell()->web_contents()->GetContainerBounds().size(),
166 scoped_ptr
<SourcePageHandleWebContents
>());
167 distiller_page_
= &distiller_page
;
169 base::RunLoop run_loop
;
170 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
173 EXPECT_EQ("Test Page Title", page_info_
.get()->title
);
174 EXPECT_THAT(page_info_
.get()->html
, HasSubstr("Lorem ipsum"));
175 EXPECT_THAT(page_info_
.get()->html
, Not(HasSubstr("questionable content")));
176 EXPECT_EQ("", page_info_
.get()->next_page_url
);
177 EXPECT_EQ("", page_info_
.get()->prev_page_url
);
180 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeLinks
) {
181 DistillerPageWebContents
distiller_page(
182 shell()->web_contents()->GetBrowserContext(),
183 shell()->web_contents()->GetContainerBounds().size(),
184 scoped_ptr
<SourcePageHandleWebContents
>());
185 distiller_page_
= &distiller_page
;
187 base::RunLoop run_loop
;
188 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
191 // A relative link should've been updated.
192 EXPECT_THAT(page_info_
.get()->html
,
193 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
194 EXPECT_THAT(page_info_
.get()->html
,
195 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
198 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeImages
) {
199 DistillerPageWebContents
distiller_page(
200 shell()->web_contents()->GetBrowserContext(),
201 shell()->web_contents()->GetContainerBounds().size(),
202 scoped_ptr
<SourcePageHandleWebContents
>());
203 distiller_page_
= &distiller_page
;
205 base::RunLoop run_loop
;
206 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
209 // A relative link should've been updated.
210 EXPECT_THAT(page_info_
.get()->html
,
211 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
212 EXPECT_THAT(page_info_
.get()->html
,
213 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
217 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, HandlesRelativeVideos
) {
218 DistillerPageWebContents
distiller_page(
219 shell()->web_contents()->GetBrowserContext(),
220 shell()->web_contents()->GetContainerBounds().size(),
221 scoped_ptr
<SourcePageHandleWebContents
>());
222 distiller_page_
= &distiller_page
;
224 base::RunLoop run_loop
;
225 DistillPage(run_loop
.QuitClosure(), kVideoArticlePath
);
228 // A relative source/track should've been updated.
230 page_info_
.get()->html
,
231 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
233 page_info_
.get()->html
,
234 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
236 page_info_
.get()->html
,
237 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
239 page_info_
.get()->html
,
240 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
243 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, VisibilityDetection
) {
244 DistillerPageWebContents
distiller_page(
245 shell()->web_contents()->GetBrowserContext(),
246 shell()->web_contents()->GetContainerBounds().size(),
247 scoped_ptr
<SourcePageHandleWebContents
>());
248 distiller_page_
= &distiller_page
;
250 // visble_style.html and invisible_style.html only differ by the visibility
251 // internal stylesheet.
254 base::RunLoop run_loop
;
255 DistillPage(run_loop
.QuitClosure(), "/visible_style.html");
257 EXPECT_THAT(page_info_
.get()->html
, HasSubstr("Lorem ipsum"));
261 base::RunLoop run_loop
;
262 DistillPage(run_loop
.QuitClosure(), "/invisible_style.html");
264 EXPECT_THAT(page_info_
.get()->html
, Not(HasSubstr("Lorem ipsum")));
268 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
269 UsingCurrentWebContentsWrongUrl
) {
270 std::string
url("/bogus");
271 bool expect_new_web_contents
= true;
272 bool setup_main_frame_observer
= true;
273 bool wait_for_document_loaded
= true;
274 RunUseCurrentWebContentsTest(url
,
275 expect_new_web_contents
,
276 setup_main_frame_observer
,
277 wait_for_document_loaded
);
280 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
281 UsingCurrentWebContentsNoMainFrameObserver
) {
282 std::string
url(kSimpleArticlePath
);
283 bool expect_new_web_contents
= true;
284 bool setup_main_frame_observer
= false;
285 bool wait_for_document_loaded
= true;
286 RunUseCurrentWebContentsTest(url
,
287 expect_new_web_contents
,
288 setup_main_frame_observer
,
289 wait_for_document_loaded
);
292 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
293 UsingCurrentWebContentsNotFinishedLoadingYet
) {
294 std::string
url(kSimpleArticlePath
);
295 bool expect_new_web_contents
= false;
296 bool setup_main_frame_observer
= true;
297 bool wait_for_document_loaded
= false;
298 RunUseCurrentWebContentsTest(url
,
299 expect_new_web_contents
,
300 setup_main_frame_observer
,
301 wait_for_document_loaded
);
304 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
,
305 UsingCurrentWebContentsReadyForDistillation
) {
306 std::string
url(kSimpleArticlePath
);
307 bool expect_new_web_contents
= false;
308 bool setup_main_frame_observer
= true;
309 bool wait_for_document_loaded
= true;
310 RunUseCurrentWebContentsTest(url
,
311 expect_new_web_contents
,
312 setup_main_frame_observer
,
313 wait_for_document_loaded
);
316 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
317 const std::string
& url
,
318 bool expect_new_web_contents
,
319 bool setup_main_frame_observer
,
320 bool wait_for_document_loaded
) {
321 content::WebContents
* current_web_contents
= shell()->web_contents();
322 if (setup_main_frame_observer
) {
323 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
324 current_web_contents
);
326 base::RunLoop url_loaded_runner
;
327 WebContentsMainFrameHelper
main_frame_loaded(current_web_contents
,
328 url_loaded_runner
.QuitClosure(),
329 wait_for_document_loaded
);
330 current_web_contents
->GetController().LoadURL(
331 embedded_test_server()->GetURL(url
),
333 content::PAGE_TRANSITION_TYPED
,
335 url_loaded_runner
.Run();
337 scoped_ptr
<content::WebContents
> old_web_contents_sptr(current_web_contents
);
338 scoped_ptr
<SourcePageHandleWebContents
> source_page_handle(
339 new SourcePageHandleWebContents(old_web_contents_sptr
.Pass()));
341 TestDistillerPageWebContents
distiller_page(
342 shell()->web_contents()->GetBrowserContext(),
343 shell()->web_contents()->GetContainerBounds().size(),
344 source_page_handle
.Pass(),
345 expect_new_web_contents
);
346 distiller_page_
= &distiller_page
;
348 base::RunLoop run_loop
;
349 DistillPage(run_loop
.QuitClosure(), kSimpleArticlePath
);
352 // Sanity check of distillation process.
353 EXPECT_EQ(expect_new_web_contents
, distiller_page
.new_web_contents_created());
354 EXPECT_EQ("Test Page Title", page_info_
.get()->title
);
357 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest
, MarkupInfo
) {
358 DistillerPageWebContents
distiller_page(
359 shell()->web_contents()->GetBrowserContext(),
360 shell()->web_contents()->GetContainerBounds().size(),
361 scoped_ptr
<SourcePageHandleWebContents
>());
362 distiller_page_
= &distiller_page
;
364 base::RunLoop run_loop
;
365 DistillPage(run_loop
.QuitClosure(), "/markup_article.html");
368 EXPECT_THAT(page_info_
.get()->html
, HasSubstr("Lorem ipsum"));
369 EXPECT_EQ("Marked-up Markup Test Page Title", page_info_
.get()->title
);
371 const DistilledPageInfo::MarkupInfo
& markup_info
= page_info_
->markup_info
;
372 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info
.title
);
373 EXPECT_EQ("Article", markup_info
.type
);
374 EXPECT_EQ("http://test/markup.html", markup_info
.url
);
375 EXPECT_EQ("This page tests Markup Info.", markup_info
.description
);
376 EXPECT_EQ("Whoever Published", markup_info
.publisher
);
377 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info
.copyright
);
378 EXPECT_EQ("Whoever Authored", markup_info
.author
);
380 const DistilledPageInfo::MarkupArticle
& markup_article
= markup_info
.article
;
381 EXPECT_EQ("Whatever Section", markup_article
.section
);
382 EXPECT_EQ("July 23, 2014", markup_article
.published_time
);
383 EXPECT_EQ("2014-07-23T23:59", markup_article
.modified_time
);
384 EXPECT_EQ("", markup_article
.expiration_time
);
385 ASSERT_EQ(1U, markup_article
.authors
.size());
386 EXPECT_EQ("Whoever Authored", markup_article
.authors
[0]);
388 ASSERT_EQ(2U, markup_info
.images
.size());
390 const DistilledPageInfo::MarkupImage
& markup_image1
= markup_info
.images
[0];
391 EXPECT_EQ("http://test/markup1.jpeg", markup_image1
.url
);
392 EXPECT_EQ("https://test/markup1.jpeg", markup_image1
.secure_url
);
393 EXPECT_EQ("jpeg", markup_image1
.type
);
394 EXPECT_EQ("", markup_image1
.caption
);
395 EXPECT_EQ(600, markup_image1
.width
);
396 EXPECT_EQ(400, markup_image1
.height
);
398 const DistilledPageInfo::MarkupImage
& markup_image2
= markup_info
.images
[1];
399 EXPECT_EQ("http://test/markup2.gif", markup_image2
.url
);
400 EXPECT_EQ("https://test/markup2.gif", markup_image2
.secure_url
);
401 EXPECT_EQ("gif", markup_image2
.type
);
402 EXPECT_EQ("", markup_image2
.caption
);
403 EXPECT_EQ(1000, markup_image2
.width
);
404 EXPECT_EQ(600, markup_image2
.height
);
407 } // namespace dom_distiller