1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/dom_distiller/core/distiller_page.h"
8 #include "base/json/json_writer.h"
9 #include "base/logging.h"
10 #include "base/message_loop/message_loop.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/time/time.h"
15 #include "grit/component_resources.h"
16 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
17 #include "third_party/dom_distiller_js/dom_distiller_json_converter.h"
18 #include "ui/base/resource/resource_bundle.h"
21 namespace dom_distiller
{
25 const char* kOptionsPlaceholder
= "$$OPTIONS";
27 std::string
GetDistillerScriptWithOptions(
28 const dom_distiller::proto::DomDistillerOptions
& options
) {
29 std::string script
= ResourceBundle::GetSharedInstance()
30 .GetRawDataResource(IDR_DISTILLER_JS
)
36 scoped_ptr
<base::Value
> options_value(
37 dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options
));
38 std::string options_json
;
39 if (!base::JSONWriter::Write(options_value
.get(), &options_json
)) {
42 size_t options_offset
= script
.find(kOptionsPlaceholder
);
43 DCHECK_NE(std::string::npos
, options_offset
);
44 DCHECK_EQ(std::string::npos
,
45 script
.find(kOptionsPlaceholder
, options_offset
+ 1));
47 script
.replace(options_offset
, strlen(kOptionsPlaceholder
), options_json
);
53 DistilledPageInfo::DistilledPageInfo() {}
55 DistilledPageInfo::~DistilledPageInfo() {}
57 DistilledPageInfo::MarkupArticle::MarkupArticle() {}
59 DistilledPageInfo::MarkupArticle::~MarkupArticle() {}
61 DistilledPageInfo::MarkupImage::MarkupImage() {}
63 DistilledPageInfo::MarkupImage::~MarkupImage() {}
65 DistilledPageInfo::MarkupInfo::MarkupInfo() {}
67 DistilledPageInfo::MarkupInfo::~MarkupInfo() {}
69 DistillerPageFactory::~DistillerPageFactory() {}
71 DistillerPage::DistillerPage() : ready_(true) {}
73 DistillerPage::~DistillerPage() {}
75 void DistillerPage::DistillPage(
77 const dom_distiller::proto::DomDistillerOptions options
,
78 const DistillerPageCallback
& callback
) {
80 // It is only possible to distill one page at a time. |ready_| is reset when
81 // the callback to OnDistillationDone happens.
83 distiller_page_callback_
= callback
;
84 DistillPageImpl(gurl
, GetDistillerScriptWithOptions(options
));
87 void DistillerPage::OnDistillationDone(const GURL
& page_url
,
88 const base::Value
* value
) {
92 scoped_ptr
<DistilledPageInfo
> page_info(new DistilledPageInfo());
93 bool found_content
= !value
->IsType(base::Value::TYPE_NULL
);
95 dom_distiller::proto::DomDistillerResult distiller_result
=
96 dom_distiller::proto::json::DomDistillerResult::ReadFromValue(value
);
98 page_info
->title
= distiller_result
.title();
99 page_info
->html
= distiller_result
.distilled_content().html();
100 page_info
->next_page_url
= distiller_result
.pagination_info().next_page();
101 page_info
->prev_page_url
= distiller_result
.pagination_info().prev_page();
102 for (int i
= 0; i
< distiller_result
.image_urls_size(); ++i
) {
103 const std::string image_url
= distiller_result
.image_urls(i
);
104 if (GURL(image_url
).is_valid()) {
105 page_info
->image_urls
.push_back(image_url
);
108 const dom_distiller::proto::MarkupInfo
& src_markup_info
=
109 distiller_result
.markup_info();
110 DistilledPageInfo::MarkupInfo
& dst_markup_info
= page_info
->markup_info
;
111 dst_markup_info
.title
= src_markup_info
.title();
112 dst_markup_info
.type
= src_markup_info
.type();
113 dst_markup_info
.url
= src_markup_info
.url();
114 dst_markup_info
.description
= src_markup_info
.description();
115 dst_markup_info
.publisher
= src_markup_info
.publisher();
116 dst_markup_info
.copyright
= src_markup_info
.copyright();
117 dst_markup_info
.author
= src_markup_info
.author();
119 const dom_distiller::proto::MarkupArticle
& src_article
=
120 src_markup_info
.article();
121 DistilledPageInfo::MarkupArticle
& dst_article
= dst_markup_info
.article
;
122 dst_article
.published_time
= src_article
.published_time();
123 dst_article
.modified_time
= src_article
.modified_time();
124 dst_article
.expiration_time
= src_article
.expiration_time();
125 dst_article
.section
= src_article
.section();
126 for (int i
= 0; i
< src_article
.authors_size(); ++i
) {
127 dst_article
.authors
.push_back(src_article
.authors(i
));
130 for (int i
= 0; i
< src_markup_info
.images_size(); ++i
) {
131 const dom_distiller::proto::MarkupImage
& src_image
=
132 src_markup_info
.images(i
);
133 DistilledPageInfo::MarkupImage dst_image
;
134 dst_image
.url
= src_image
.url();
135 dst_image
.secure_url
= src_image
.secure_url();
136 dst_image
.type
= src_image
.type();
137 dst_image
.caption
= src_image
.caption();
138 dst_image
.width
= src_image
.width();
139 dst_image
.height
= src_image
.height();
140 dst_markup_info
.images
.push_back(dst_image
);
142 if (distiller_result
.has_timing_info()) {
143 const dom_distiller::proto::TimingInfo
& timing
=
144 distiller_result
.timing_info();
145 if (timing
.has_markup_parsing_time()) {
147 "DomDistiller.Time.MarkupParsing",
148 base::TimeDelta::FromMillisecondsD(timing
.markup_parsing_time()));
150 if (timing
.has_document_construction_time()) {
152 "DomDistiller.Time.DocumentConstruction",
153 base::TimeDelta::FromMillisecondsD(
154 timing
.document_construction_time()));
156 if (timing
.has_article_processing_time()) {
158 "DomDistiller.Time.ArticleProcessing",
159 base::TimeDelta::FromMillisecondsD(
160 timing
.article_processing_time()));
162 if (timing
.has_formatting_time()) {
164 "DomDistiller.Time.Formatting",
165 base::TimeDelta::FromMillisecondsD(timing
.formatting_time()));
167 if (timing
.has_total_time()) {
169 "DomDistiller.Time.DistillationTotal",
170 base::TimeDelta::FromMillisecondsD(timing
.total_time()));
175 base::MessageLoop::current()->PostTask(
178 distiller_page_callback_
, base::Passed(&page_info
), found_content
));
181 } // namespace dom_distiller