Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / components / dom_distiller / core / distiller.h
blob648949e2132cee44f637f441acf29d8014aff51b
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
8 #include <map>
9 #include <string>
11 #include "base/callback.h"
12 #include "base/containers/hash_tables.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/memory/scoped_vector.h"
16 #include "base/memory/weak_ptr.h"
17 #include "components/dom_distiller/core/article_distillation_update.h"
18 #include "components/dom_distiller/core/distiller_page.h"
19 #include "components/dom_distiller/core/distiller_url_fetcher.h"
20 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
21 #include "net/url_request/url_request_context_getter.h"
22 #include "url/gurl.h"
24 namespace dom_distiller {
26 class DistillerImpl;
28 class Distiller {
29 public:
30 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)>
31 DistillationFinishedCallback;
32 typedef base::Callback<void(const ArticleDistillationUpdate&)>
33 DistillationUpdateCallback;
35 virtual ~Distiller() {}
37 // Distills a page, and asynchronously returns the article HTML to the
38 // supplied |finished_cb| callback. |update_cb| is invoked whenever article
39 // under distillation is updated with more data.
40 // E.g. when distilling a 2 page article, |update_cb| may be invoked each time
41 // a distilled page is added and |finished_cb| will be invoked once
42 // distillation is completed.
43 virtual void DistillPage(const GURL& url,
44 scoped_ptr<DistillerPage> distiller_page,
45 const DistillationFinishedCallback& finished_cb,
46 const DistillationUpdateCallback& update_cb) = 0;
49 class DistillerFactory {
50 public:
51 virtual scoped_ptr<Distiller> CreateDistiller() = 0;
52 virtual ~DistillerFactory() {}
55 // Factory for creating a Distiller.
56 class DistillerFactoryImpl : public DistillerFactory {
57 public:
58 DistillerFactoryImpl(
59 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory,
60 const dom_distiller::proto::DomDistillerOptions& dom_distiller_options);
61 virtual ~DistillerFactoryImpl();
62 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
64 private:
65 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
66 dom_distiller::proto::DomDistillerOptions dom_distiller_options_;
69 // Distills a article from a page and associated pages.
70 class DistillerImpl : public Distiller {
71 public:
72 DistillerImpl(
73 const DistillerURLFetcherFactory& distiller_url_fetcher_factory,
74 const dom_distiller::proto::DomDistillerOptions& dom_distiller_options);
75 virtual ~DistillerImpl();
77 virtual void DistillPage(
78 const GURL& url,
79 scoped_ptr<DistillerPage> distiller_page,
80 const DistillationFinishedCallback& finished_cb,
81 const DistillationUpdateCallback& update_cb) OVERRIDE;
83 void SetMaxNumPagesInArticle(size_t max_num_pages);
85 private:
86 // In case of multiple pages, the Distiller maintains state of multiple pages
87 // as page numbers relative to the page number where distillation started.
88 // E.g. if distillation starts at page 2 for a 3 page article. The relative
89 // page numbers assigned to pages will be [-1,0,1].
91 // Class representing the state of a page under distillation.
92 struct DistilledPageData {
93 DistilledPageData();
94 virtual ~DistilledPageData();
95 // Relative page number of the page.
96 int page_num;
97 ScopedVector<DistillerURLFetcher> image_fetchers_;
98 scoped_refptr<base::RefCountedData<DistilledPageProto> >
99 distilled_page_proto;
101 private:
102 DISALLOW_COPY_AND_ASSIGN(DistilledPageData);
105 void OnFetchImageDone(int page_num,
106 DistillerURLFetcher* url_fetcher,
107 const std::string& id,
108 const std::string& response);
110 void OnPageDistillationFinished(int page_num,
111 const GURL& page_url,
112 scoped_ptr<DistilledPageInfo> distilled_page,
113 bool distillation_successful);
115 virtual void FetchImage(int page_num,
116 const std::string& image_id,
117 const std::string& item);
119 // Distills the next page.
120 void DistillNextPage();
122 // Adds the |url| to |pages_to_be_distilled| if |page_num| is a valid relative
123 // page number and |url| is valid. Ignores duplicate pages and urls.
124 void AddToDistillationQueue(int page_num, const GURL& url);
126 // Check if |page_num| is a valid relative page number, i.e. page with
127 // |page_num| is either under distillation or has already completed
128 // distillation.
129 bool IsPageNumberInUse(int page_num) const;
131 bool AreAllPagesFinished() const;
133 // Total number of pages in the article that the distiller knows of, this
134 // includes pages that are pending distillation.
135 size_t TotalPageCount() const;
137 // Runs |finished_cb_| if all distillation callbacks and image fetches are
138 // complete.
139 void RunDistillerCallbackIfDone();
141 // Checks if page |distilled_page_data| has finished distillation, including
142 // all image fetches.
143 void AddPageIfDone(int page_num);
145 DistilledPageData* GetPageAtIndex(size_t index) const;
147 // Create an ArticleDistillationUpdate for the current distillation
148 // state.
149 const ArticleDistillationUpdate CreateDistillationUpdate() const;
151 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
152 scoped_ptr<DistillerPage> distiller_page_;
154 dom_distiller::proto::DomDistillerOptions dom_distiller_options_;
155 DistillationFinishedCallback finished_cb_;
156 DistillationUpdateCallback update_cb_;
158 // Set of pages that are under distillation or have finished distillation.
159 // |started_pages_index_| and |finished_pages_index_| maintains the mapping
160 // from page number to the indices in |pages_|.
161 ScopedVector<DistilledPageData> pages_;
163 // Maps page numbers of finished pages to the indices in |pages_|.
164 std::map<int, size_t> finished_pages_index_;
166 // Maps page numbers of pages under distillation to the indices in |pages_|.
167 // If a page is |started_pages_| that means it is still waiting for an action
168 // (distillation or image fetch) to finish.
169 base::hash_map<int, size_t> started_pages_index_;
171 // The list of pages that are still waiting for distillation to start.
172 // This is a map, to make distiller prefer distilling lower page numbers
173 // first.
174 std::map<int, GURL> waiting_pages_;
176 // Set to keep track of which urls are already seen by the distiller. Used to
177 // prevent distiller from distilling the same url twice.
178 base::hash_set<std::string> seen_urls_;
180 size_t max_pages_in_article_;
182 bool destruction_allowed_;
184 base::WeakPtrFactory<DistillerImpl> weak_factory_;
186 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
189 } // namespace dom_distiller
191 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_