1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/command_line.h"
8 #include "base/files/scoped_temp_dir.h"
9 #include "base/message_loop/message_loop.h"
10 #include "base/path_service.h"
11 #include "base/run_loop.h"
12 #include "components/dom_distiller/content/distiller_page_web_contents.h"
13 #include "components/dom_distiller/core/distiller.h"
14 #include "components/dom_distiller/core/dom_distiller_database.h"
15 #include "components/dom_distiller/core/dom_distiller_service.h"
16 #include "components/dom_distiller/core/dom_distiller_store.h"
17 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
18 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
19 #include "components/dom_distiller/core/task_tracker.h"
20 #include "content/public/browser/browser_context.h"
21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/test/content_browser_test.h"
23 #include "content/shell/browser/shell.h"
24 #include "net/dns/mock_host_resolver.h"
25 #include "ui/base/resource/resource_bundle.h"
27 using content::ContentBrowserTest
;
29 namespace dom_distiller
{
33 // The url to distill.
34 const char* kUrlSwitch
= "url";
36 // Indicates that DNS resolution should be disabled for this test.
37 const char* kDisableDnsSwitch
= "disable-dns";
39 // Will write the distilled output to the given file instead of to stdout.
40 const char* kOutputFile
= "output-file";
42 // Indicates to output a serialized protocol buffer instead of human-readable
44 const char* kShouldOutputBinary
= "output-binary";
46 scoped_ptr
<DomDistillerService
> CreateDomDistillerService(
47 content::BrowserContext
* context
,
48 const base::FilePath
& db_path
) {
49 scoped_refptr
<base::SequencedTaskRunner
> background_task_runner
=
50 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner(
51 content::BrowserThread::GetBlockingPool()->GetSequenceToken());
53 // TODO(cjhopman): use an in-memory database instead of an on-disk one with
54 // temporary directory.
55 scoped_ptr
<DomDistillerDatabase
> db(
56 new DomDistillerDatabase(background_task_runner
));
57 scoped_ptr
<DomDistillerStore
> dom_distiller_store(new DomDistillerStore(
58 db
.PassAs
<DomDistillerDatabaseInterface
>(), db_path
));
60 scoped_ptr
<DistillerPageFactory
> distiller_page_factory(
61 new DistillerPageWebContentsFactory(context
));
62 scoped_ptr
<DistillerURLFetcherFactory
> distiller_url_fetcher_factory(
63 new DistillerURLFetcherFactory(context
->GetRequestContext()));
64 scoped_ptr
<DistillerFactory
> distiller_factory(
65 new DistillerFactoryImpl(distiller_url_fetcher_factory
.Pass()));
67 return scoped_ptr
<DomDistillerService
>(new DomDistillerService(
68 dom_distiller_store
.PassAs
<DomDistillerStoreInterface
>(),
69 distiller_factory
.Pass(),
70 distiller_page_factory
.Pass()));
73 void AddComponentsResources() {
74 base::FilePath pak_file
;
75 base::FilePath pak_dir
;
76 PathService::Get(base::DIR_MODULE
, &pak_dir
);
77 pak_file
= pak_dir
.Append(FILE_PATH_LITERAL("components_resources.pak"));
78 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
79 pak_file
, ui::SCALE_FACTOR_NONE
);
82 void LogArticle(const DistilledArticleProto
& article_proto
) {
83 std::stringstream output
;
84 if (CommandLine::ForCurrentProcess()->HasSwitch(kShouldOutputBinary
)) {
85 output
<< article_proto
.SerializeAsString();
87 output
<< "Article Title: " << article_proto
.title() << std::endl
;
88 output
<< "# of pages: " << article_proto
.pages_size() << std::endl
;
89 for (int i
= 0; i
< article_proto
.pages_size(); ++i
) {
90 const DistilledPageProto
& page
= article_proto
.pages(i
);
91 output
<< "Page " << i
<< std::endl
;
92 output
<< "URL: " << page
.url() << std::endl
;
93 output
<< "Content: " << page
.html() << std::endl
;
97 std::string data
= output
.str();
98 if (CommandLine::ForCurrentProcess()->HasSwitch(kOutputFile
)) {
99 base::FilePath filename
=
100 CommandLine::ForCurrentProcess()->GetSwitchValuePath(kOutputFile
);
101 base::WriteFile(filename
, data
.c_str(), data
.size());
109 class ContentExtractionRequest
: public ViewRequestDelegate
{
111 void Start(DomDistillerService
* service
, base::Closure finished_callback
) {
112 finished_callback_
= finished_callback
;
114 service
->ViewUrl(this, service
->CreateDefaultDistillerPage(), url_
);
117 DistilledArticleProto
GetArticleCopy() {
118 return *article_proto_
;
121 static scoped_ptr
<ContentExtractionRequest
> CreateForCommandLine(
122 const CommandLine
& command_line
) {
124 if (command_line
.HasSwitch(kUrlSwitch
)) {
125 std::string url_string
= command_line
.GetSwitchValueASCII(kUrlSwitch
);
126 url
= GURL(url_string
);
128 if (!url
.is_valid()) {
129 ADD_FAILURE() << "No valid url provided";
130 return scoped_ptr
<ContentExtractionRequest
>();
132 return scoped_ptr
<ContentExtractionRequest
>(
133 new ContentExtractionRequest(url
));
137 ContentExtractionRequest(const GURL
& url
) : url_(url
) {}
139 virtual void OnArticleUpdated(ArticleDistillationUpdate article_update
)
142 virtual void OnArticleReady(const DistilledArticleProto
* article_proto
)
144 article_proto_
= article_proto
;
145 base::MessageLoop::current()->PostTask(
150 const DistilledArticleProto
* article_proto_
;
151 scoped_ptr
<ViewerHandle
> viewer_handle_
;
153 base::Closure finished_callback_
;
156 class ContentExtractor
: public ContentBrowserTest
{
157 // Change behavior of the default host resolver to avoid DNS lookup errors, so
158 // we can make network calls.
159 virtual void SetUpOnMainThread() OVERRIDE
{
160 if (!CommandLine::ForCurrentProcess()->HasSwitch(kDisableDnsSwitch
)) {
161 EnableDNSLookupForThisTest();
163 CHECK(db_dir_
.CreateUniqueTempDir());
164 AddComponentsResources();
167 virtual void TearDownOnMainThread() OVERRIDE
{
168 DisableDNSLookupForThisTest();
172 // Creates the DomDistillerService and creates and starts the extraction
175 content::BrowserContext
* context
=
176 shell()->web_contents()->GetBrowserContext();
177 service_
= CreateDomDistillerService(context
,
179 const CommandLine
& command_line
= *CommandLine::ForCurrentProcess();
180 request_
= ContentExtractionRequest::CreateForCommandLine(command_line
);
183 base::Bind(&ContentExtractor::Finish
, base::Unretained(this)));
187 // Change behavior of the default host resolver to allow DNS lookup
188 // to proceed instead of being blocked by the test infrastructure.
189 void EnableDNSLookupForThisTest() {
190 // mock_host_resolver_override_ takes ownership of the resolver.
191 scoped_refptr
<net::RuleBasedHostResolverProc
> resolver
=
192 new net::RuleBasedHostResolverProc(host_resolver());
193 resolver
->AllowDirectLookup("*");
194 mock_host_resolver_override_
.reset(
195 new net::ScopedDefaultHostResolverProc(resolver
.get()));
198 // We need to reset the DNS lookup when we finish, or the test will fail.
199 void DisableDNSLookupForThisTest() {
200 mock_host_resolver_override_
.reset();
204 LogArticle(request_
->GetArticleCopy());
207 base::MessageLoop::current()->PostTask(
208 FROM_HERE
, base::MessageLoop::QuitWhenIdleClosure());
211 base::ScopedTempDir db_dir_
;
212 scoped_ptr
<net::ScopedDefaultHostResolverProc
> mock_host_resolver_override_
;
213 scoped_ptr
<DomDistillerService
> service_
;
214 scoped_ptr
<ContentExtractionRequest
> request_
;
217 IN_PROC_BROWSER_TEST_F(ContentExtractor
, MANUAL_ExtractUrl
) {
219 base::RunLoop().Run();
222 } // namespace dom_distiller