[GCM] Investigatory CHECKs for crash in parsing stream
[chromium-blink-merge.git] / components / dom_distiller / content / distiller_page_web_contents_browsertest.cc
blob432c958bd61a6d35f758a08a724e2a30ac7fc6e8
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
13 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
14 #include "components/dom_distiller/core/viewer.h"
15 #include "content/public/browser/browser_context.h"
16 #include "content/public/browser/navigation_controller.h"
17 #include "content/public/browser/render_frame_host.h"
18 #include "content/public/browser/web_contents_observer.h"
19 #include "content/public/test/content_browser_test.h"
20 #include "content/shell/browser/shell.h"
21 #include "grit/components_strings.h"
22 #include "net/test/embedded_test_server/embedded_test_server.h"
23 #include "testing/gmock/include/gmock/gmock.h"
24 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
25 #include "ui/base/l10n/l10n_util.h"
26 #include "ui/base/resource/resource_bundle.h"
28 using content::ContentBrowserTest;
29 using testing::ContainsRegex;
30 using testing::HasSubstr;
31 using testing::Not;
33 namespace dom_distiller {
35 const char* kSimpleArticlePath = "/simple_article.html";
36 const char* kVideoArticlePath = "/video_article.html";
38 class DistillerPageWebContentsTest : public ContentBrowserTest {
39 public:
40 // ContentBrowserTest:
41 virtual void SetUpOnMainThread() OVERRIDE {
42 AddComponentsResources();
43 SetUpTestServer();
44 ContentBrowserTest::SetUpOnMainThread();
47 void DistillPage(const base::Closure& quit_closure, const std::string& url) {
48 quit_closure_ = quit_closure;
49 distiller_page_->DistillPage(
50 embedded_test_server()->GetURL(url),
51 dom_distiller::proto::DomDistillerOptions(),
52 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
53 this));
56 void OnPageDistillationFinished(
57 scoped_ptr<proto::DomDistillerResult> distiller_result,
58 bool distillation_successful) {
59 distiller_result_ = distiller_result.Pass();
60 quit_closure_.Run();
63 private:
64 void AddComponentsResources() {
65 base::FilePath pak_file;
66 base::FilePath pak_dir;
67 PathService::Get(base::DIR_MODULE, &pak_dir);
68 pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
69 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
70 pak_file, ui::SCALE_FACTOR_NONE);
73 void SetUpTestServer() {
74 base::FilePath path;
75 PathService::Get(base::DIR_SOURCE_ROOT, &path);
76 path = path.AppendASCII("components/test/data/dom_distiller");
77 embedded_test_server()->ServeFilesFromDirectory(path);
78 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
81 protected:
82 void RunUseCurrentWebContentsTest(const std::string& url,
83 bool expect_new_web_contents,
84 bool setup_main_frame_observer,
85 bool wait_for_document_loaded);
87 DistillerPageWebContents* distiller_page_;
88 base::Closure quit_closure_;
89 scoped_ptr<proto::DomDistillerResult> distiller_result_;
92 // Use this class to be able to leak the WebContents, which is needed for when
93 // the current WebContents is used for distillation.
94 class TestDistillerPageWebContents : public DistillerPageWebContents {
95 public:
96 TestDistillerPageWebContents(
97 content::BrowserContext* browser_context,
98 const gfx::Size& render_view_size,
99 scoped_ptr<SourcePageHandleWebContents> optional_web_contents_handle,
100 bool expect_new_web_contents)
101 : DistillerPageWebContents(browser_context, render_view_size,
102 optional_web_contents_handle.Pass()),
103 expect_new_web_contents_(expect_new_web_contents),
104 new_web_contents_created_(false) {}
106 virtual void CreateNewWebContents(const GURL& url) OVERRIDE {
107 ASSERT_EQ(true, expect_new_web_contents_);
108 new_web_contents_created_ = true;
109 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
110 // the WebContents, so intentionally leak WebContents here, since it is
111 // owned by the shell.
112 content::WebContents* web_contents = web_contents_.release();
113 web_contents->GetLastCommittedURL();
114 DistillerPageWebContents::CreateNewWebContents(url);
117 virtual ~TestDistillerPageWebContents() {
118 if (!expect_new_web_contents_) {
119 // Intentionally leaking WebContents, since it is owned by the shell.
120 content::WebContents* web_contents = web_contents_.release();
121 web_contents->GetLastCommittedURL();
125 bool new_web_contents_created() { return new_web_contents_created_; }
127 private:
128 bool expect_new_web_contents_;
129 bool new_web_contents_created_;
132 // Helper class to know how far in the loading process the current WebContents
133 // has come. It will call the callback either after
134 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
135 // main frame, based on the value of |wait_for_document_loaded|.
136 class WebContentsMainFrameHelper : public content::WebContentsObserver {
137 public:
138 WebContentsMainFrameHelper(content::WebContents* web_contents,
139 const base::Closure& callback,
140 bool wait_for_document_loaded)
141 : WebContentsObserver(web_contents),
142 callback_(callback),
143 wait_for_document_loaded_(wait_for_document_loaded) {}
145 virtual void DidCommitProvisionalLoadForFrame(
146 content::RenderFrameHost* render_frame_host,
147 const GURL& url,
148 ui::PageTransition transition_type) OVERRIDE {
149 if (wait_for_document_loaded_)
150 return;
151 if (!render_frame_host->GetParent())
152 callback_.Run();
155 virtual void DocumentLoadedInFrame(
156 content::RenderFrameHost* render_frame_host) OVERRIDE {
157 if (wait_for_document_loaded_) {
158 if (!render_frame_host->GetParent())
159 callback_.Run();
163 private:
164 base::Closure callback_;
165 bool wait_for_document_loaded_;
168 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
169 DistillerPageWebContents distiller_page(
170 shell()->web_contents()->GetBrowserContext(),
171 shell()->web_contents()->GetContainerBounds().size(),
172 scoped_ptr<SourcePageHandleWebContents>());
173 distiller_page_ = &distiller_page;
175 base::RunLoop run_loop;
176 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
177 run_loop.Run();
179 EXPECT_EQ("Test Page Title", distiller_result_->title());
180 EXPECT_THAT(distiller_result_->distilled_content().html(),
181 HasSubstr("Lorem ipsum"));
182 EXPECT_THAT(distiller_result_->distilled_content().html(),
183 Not(HasSubstr("questionable content")));
184 EXPECT_EQ("", distiller_result_->pagination_info().next_page());
185 EXPECT_EQ("", distiller_result_->pagination_info().prev_page());
188 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
189 DistillerPageWebContents distiller_page(
190 shell()->web_contents()->GetBrowserContext(),
191 shell()->web_contents()->GetContainerBounds().size(),
192 scoped_ptr<SourcePageHandleWebContents>());
193 distiller_page_ = &distiller_page;
195 base::RunLoop run_loop;
196 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
197 run_loop.Run();
199 // A relative link should've been updated.
200 EXPECT_THAT(distiller_result_->distilled_content().html(),
201 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
202 EXPECT_THAT(distiller_result_->distilled_content().html(),
203 HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
206 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
207 DistillerPageWebContents distiller_page(
208 shell()->web_contents()->GetBrowserContext(),
209 shell()->web_contents()->GetContainerBounds().size(),
210 scoped_ptr<SourcePageHandleWebContents>());
211 distiller_page_ = &distiller_page;
213 base::RunLoop run_loop;
214 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
215 run_loop.Run();
217 // A relative link should've been updated.
218 EXPECT_THAT(distiller_result_->distilled_content().html(),
219 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
220 EXPECT_THAT(distiller_result_->distilled_content().html(),
221 HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
225 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) {
226 DistillerPageWebContents distiller_page(
227 shell()->web_contents()->GetBrowserContext(),
228 shell()->web_contents()->GetContainerBounds().size(),
229 scoped_ptr<SourcePageHandleWebContents>());
230 distiller_page_ = &distiller_page;
232 base::RunLoop run_loop;
233 DistillPage(run_loop.QuitClosure(), kVideoArticlePath);
234 run_loop.Run();
236 // A relative source/track should've been updated.
237 EXPECT_THAT(distiller_result_->distilled_content().html(),
238 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
239 EXPECT_THAT(
240 distiller_result_->distilled_content().html(),
241 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
242 EXPECT_THAT(distiller_result_->distilled_content().html(),
243 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
244 EXPECT_THAT(distiller_result_->distilled_content().html(),
245 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
248 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
249 DistillerPageWebContents distiller_page(
250 shell()->web_contents()->GetBrowserContext(),
251 shell()->web_contents()->GetContainerBounds().size(),
252 scoped_ptr<SourcePageHandleWebContents>());
253 distiller_page_ = &distiller_page;
255 // visble_style.html and invisible_style.html only differ by the visibility
256 // internal stylesheet.
259 base::RunLoop run_loop;
260 DistillPage(run_loop.QuitClosure(), "/visible_style.html");
261 run_loop.Run();
262 EXPECT_THAT(distiller_result_->distilled_content().html(),
263 HasSubstr("Lorem ipsum"));
267 base::RunLoop run_loop;
268 DistillPage(run_loop.QuitClosure(), "/invisible_style.html");
269 run_loop.Run();
270 EXPECT_THAT(distiller_result_->distilled_content().html(),
271 Not(HasSubstr("Lorem ipsum")));
275 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
276 UsingCurrentWebContentsWrongUrl) {
277 std::string url("/bogus");
278 bool expect_new_web_contents = true;
279 bool setup_main_frame_observer = true;
280 bool wait_for_document_loaded = true;
281 RunUseCurrentWebContentsTest(url,
282 expect_new_web_contents,
283 setup_main_frame_observer,
284 wait_for_document_loaded);
287 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
288 UsingCurrentWebContentsNoMainFrameObserver) {
289 std::string url(kSimpleArticlePath);
290 bool expect_new_web_contents = true;
291 bool setup_main_frame_observer = false;
292 bool wait_for_document_loaded = true;
293 RunUseCurrentWebContentsTest(url,
294 expect_new_web_contents,
295 setup_main_frame_observer,
296 wait_for_document_loaded);
299 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
300 UsingCurrentWebContentsNotFinishedLoadingYet) {
301 std::string url(kSimpleArticlePath);
302 bool expect_new_web_contents = false;
303 bool setup_main_frame_observer = true;
304 bool wait_for_document_loaded = false;
305 RunUseCurrentWebContentsTest(url,
306 expect_new_web_contents,
307 setup_main_frame_observer,
308 wait_for_document_loaded);
311 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
312 UsingCurrentWebContentsReadyForDistillation) {
313 std::string url(kSimpleArticlePath);
314 bool expect_new_web_contents = false;
315 bool setup_main_frame_observer = true;
316 bool wait_for_document_loaded = true;
317 RunUseCurrentWebContentsTest(url,
318 expect_new_web_contents,
319 setup_main_frame_observer,
320 wait_for_document_loaded);
323 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
324 const std::string& url,
325 bool expect_new_web_contents,
326 bool setup_main_frame_observer,
327 bool wait_for_document_loaded) {
328 content::WebContents* current_web_contents = shell()->web_contents();
329 if (setup_main_frame_observer) {
330 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
331 current_web_contents);
333 base::RunLoop url_loaded_runner;
334 WebContentsMainFrameHelper main_frame_loaded(current_web_contents,
335 url_loaded_runner.QuitClosure(),
336 wait_for_document_loaded);
337 current_web_contents->GetController().LoadURL(
338 embedded_test_server()->GetURL(url),
339 content::Referrer(),
340 ui::PAGE_TRANSITION_TYPED,
341 std::string());
342 url_loaded_runner.Run();
344 scoped_ptr<content::WebContents> old_web_contents_sptr(current_web_contents);
345 scoped_ptr<SourcePageHandleWebContents> source_page_handle(
346 new SourcePageHandleWebContents(old_web_contents_sptr.Pass()));
348 TestDistillerPageWebContents distiller_page(
349 shell()->web_contents()->GetBrowserContext(),
350 shell()->web_contents()->GetContainerBounds().size(),
351 source_page_handle.Pass(),
352 expect_new_web_contents);
353 distiller_page_ = &distiller_page;
355 base::RunLoop run_loop;
356 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
357 run_loop.Run();
359 // Sanity check of distillation process.
360 EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created());
361 EXPECT_EQ("Test Page Title", distiller_result_->title());
364 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
365 DistillerPageWebContents distiller_page(
366 shell()->web_contents()->GetBrowserContext(),
367 shell()->web_contents()->GetContainerBounds().size(),
368 scoped_ptr<SourcePageHandleWebContents>());
369 distiller_page_ = &distiller_page;
371 base::RunLoop run_loop;
372 DistillPage(run_loop.QuitClosure(), "/markup_article.html");
373 run_loop.Run();
375 EXPECT_THAT(distiller_result_->distilled_content().html(),
376 HasSubstr("Lorem ipsum"));
377 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_->title());
379 const proto::MarkupInfo markup_info = distiller_result_->markup_info();
380 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title());
381 EXPECT_EQ("Article", markup_info.type());
382 EXPECT_EQ("http://test/markup.html", markup_info.url());
383 EXPECT_EQ("This page tests Markup Info.", markup_info.description());
384 EXPECT_EQ("Whoever Published", markup_info.publisher());
385 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright());
386 EXPECT_EQ("Whoever Authored", markup_info.author());
388 const proto::MarkupArticle markup_article = markup_info.article();
389 EXPECT_EQ("Whatever Section", markup_article.section());
390 EXPECT_EQ("July 23, 2014", markup_article.published_time());
391 EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time());
392 EXPECT_EQ("", markup_article.expiration_time());
393 ASSERT_EQ(1, markup_article.authors_size());
394 EXPECT_EQ("Whoever Authored", markup_article.authors(0));
396 ASSERT_EQ(2, markup_info.images_size());
398 const proto::MarkupImage markup_image1 = markup_info.images(0);
399 EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url());
400 EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url());
401 EXPECT_EQ("jpeg", markup_image1.type());
402 EXPECT_EQ("", markup_image1.caption());
403 EXPECT_EQ(600, markup_image1.width());
404 EXPECT_EQ(400, markup_image1.height());
406 const proto::MarkupImage markup_image2 = markup_info.images(1);
407 EXPECT_EQ("http://test/markup2.gif", markup_image2.url());
408 EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url());
409 EXPECT_EQ("gif", markup_image2.type());
410 EXPECT_EQ("", markup_image2.caption());
411 EXPECT_EQ(1000, markup_image2.width());
412 EXPECT_EQ(600, markup_image2.height());
415 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
416 TestTitleAndContentAreNeverEmpty) {
417 const std::string some_title = "some title";
418 const std::string some_content = "some content";
419 const std::string no_title =
420 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE);
421 const std::string no_content =
422 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT);
424 { // Test non-empty title and content for article.
425 scoped_ptr<DistilledArticleProto> article_proto(
426 new DistilledArticleProto());
427 article_proto->set_title(some_title);
428 (*(article_proto->add_pages())).set_html(some_content);
429 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
430 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
431 EXPECT_THAT(html, HasSubstr(some_title));
432 EXPECT_THAT(html, HasSubstr(some_content));
433 EXPECT_THAT(html, Not(HasSubstr(no_title)));
434 EXPECT_THAT(html, Not(HasSubstr(no_content)));
437 { // Test empty title and content for article.
438 scoped_ptr<DistilledArticleProto> article_proto(
439 new DistilledArticleProto());
440 article_proto->set_title("");
441 (*(article_proto->add_pages())).set_html("");
442 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
443 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
444 EXPECT_THAT(html, HasSubstr(no_title));
445 EXPECT_THAT(html, HasSubstr(no_content));
446 EXPECT_THAT(html, Not(HasSubstr(some_title)));
447 EXPECT_THAT(html, Not(HasSubstr(some_content)));
450 { // Test missing title and non-empty content for article.
451 scoped_ptr<DistilledArticleProto> article_proto(
452 new DistilledArticleProto());
453 (*(article_proto->add_pages())).set_html(some_content);
454 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
455 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
456 EXPECT_THAT(html, HasSubstr(no_title));
457 EXPECT_THAT(html, HasSubstr(no_content));
458 EXPECT_THAT(html, Not(HasSubstr(some_title)));
459 EXPECT_THAT(html, Not(HasSubstr(some_content)));
462 { // Test non-empty title and missing content for article.
463 scoped_ptr<DistilledArticleProto> article_proto(
464 new DistilledArticleProto());
465 article_proto->set_title(some_title);
466 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
467 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
468 EXPECT_THAT(html, HasSubstr(no_title));
469 EXPECT_THAT(html, HasSubstr(no_content));
470 EXPECT_THAT(html, Not(HasSubstr(some_title)));
471 EXPECT_THAT(html, Not(HasSubstr(some_content)));
474 { // Test non-empty title and content for page.
475 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
476 page_proto->set_title(some_title);
477 page_proto->set_html(some_content);
478 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
479 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
480 EXPECT_THAT(html, HasSubstr(some_title));
481 EXPECT_THAT(html, HasSubstr(some_content));
482 EXPECT_THAT(html, Not(HasSubstr(no_title)));
483 EXPECT_THAT(html, Not(HasSubstr(no_content)));
486 { // Test empty title and content for page.
487 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
488 page_proto->set_title("");
489 page_proto->set_html("");
490 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
491 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
492 EXPECT_THAT(html, HasSubstr(no_title));
493 EXPECT_THAT(html, HasSubstr(no_content));
494 EXPECT_THAT(html, Not(HasSubstr(some_title)));
495 EXPECT_THAT(html, Not(HasSubstr(some_content)));
498 { // Test missing title and non-empty content for page.
499 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
500 page_proto->set_html(some_content);
501 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
502 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
503 EXPECT_THAT(html, HasSubstr(no_title));
504 EXPECT_THAT(html, HasSubstr(some_content));
505 EXPECT_THAT(html, Not(HasSubstr(some_title)));
506 EXPECT_THAT(html, Not(HasSubstr(no_content)));
509 { // Test non-empty title and missing content for page.
510 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
511 page_proto->set_title(some_title);
512 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
513 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
514 EXPECT_THAT(html, HasSubstr(some_title));
515 EXPECT_THAT(html, HasSubstr(no_content));
516 EXPECT_THAT(html, Not(HasSubstr(no_title)));
517 EXPECT_THAT(html, Not(HasSubstr(some_content)));
521 } // namespace dom_distiller