Roll src/third_party/WebKit 9f7fb92:f103b33 (svn 202621:202622)
[chromium-blink-merge.git] / components / precache / core / precache_fetcher.cc
blob65d598a2de8336fd905bcdd8bde35c94e7c2a47a
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/precache/core/precache_fetcher.h"
7 #include <string>
8 #include <vector>
10 #include "base/bind.h"
11 #include "base/callback.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/containers/hash_tables.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "components/precache/core/precache_switches.h"
17 #include "components/precache/core/proto/precache.pb.h"
18 #include "net/base/completion_callback.h"
19 #include "net/base/escape.h"
20 #include "net/base/io_buffer.h"
21 #include "net/base/load_flags.h"
22 #include "net/base/net_errors.h"
23 #include "net/http/http_response_headers.h"
24 #include "net/url_request/url_fetcher_response_writer.h"
25 #include "net/url_request/url_request_context_getter.h"
26 #include "net/url_request/url_request_status.h"
28 using net::URLFetcher;
30 namespace precache {
32 // The following flags are for privacy reasons. For example, if a user clears
33 // their cookies, but a tracking beacon is prefetched and the beacon specifies
34 // its source URL in a URL param, the beacon site would be able to rebuild a
35 // profile of the user. All three flags should occur together, or not at all,
36 // per
37 // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussion.
38 const int kNoTracking =
39 net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
40 net::LOAD_DO_NOT_SEND_AUTH_DATA;
42 namespace {
44 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
45 // a number we expect to be in the 99th percentile for the histogram, give or
46 // take.
47 const int kMaxResponseBytes = 500 * 1024 * 1024;
49 GURL GetDefaultConfigURL() {
50 const base::CommandLine& command_line =
51 *base::CommandLine::ForCurrentProcess();
52 if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) {
53 return GURL(
54 command_line.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL));
57 #if defined(PRECACHE_CONFIG_SETTINGS_URL)
58 return GURL(PRECACHE_CONFIG_SETTINGS_URL);
59 #else
60 // The precache config settings URL could not be determined, so return an
61 // empty, invalid GURL.
62 return GURL();
63 #endif
66 std::string GetDefaultManifestURLPrefix() {
67 const base::CommandLine& command_line =
68 *base::CommandLine::ForCurrentProcess();
69 if (command_line.HasSwitch(switches::kPrecacheManifestURLPrefix)) {
70 return command_line.GetSwitchValueASCII(
71 switches::kPrecacheManifestURLPrefix);
74 #if defined(PRECACHE_MANIFEST_URL_PREFIX)
75 return PRECACHE_MANIFEST_URL_PREFIX;
76 #else
77 // The precache manifest URL prefix could not be determined, so return an
78 // empty string.
79 return std::string();
80 #endif
83 // Construct the URL of the precache manifest for the given name (either host or
84 // URL). The server is expecting a request for a URL consisting of the manifest
85 // URL prefix followed by the doubly escaped name.
86 std::string ConstructManifestURL(const std::string& prefix,
87 const std::string& name) {
88 return prefix + net::EscapeQueryParamValue(
89 net::EscapeQueryParamValue(name, false), false);
92 // Attempts to parse a protobuf message from the response string of a
93 // URLFetcher. If parsing is successful, the message parameter will contain the
94 // parsed protobuf and this function will return true. Otherwise, returns false.
95 bool ParseProtoFromFetchResponse(const URLFetcher& source,
96 ::google::protobuf::MessageLite* message) {
97 std::string response_string;
99 if (!source.GetStatus().is_success()) {
100 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec();
101 return false;
103 if (!source.GetResponseAsString(&response_string)) {
104 DLOG(WARNING) << "No response string present: "
105 << source.GetOriginalURL().spec();
106 return false;
108 if (!message->ParseFromString(response_string)) {
109 DLOG(WARNING) << "Unable to parse proto served from "
110 << source.GetOriginalURL().spec();
111 return false;
113 return true;
116 // URLFetcherResponseWriter that ignores the response body, in order to avoid
117 // the unnecessary memory usage. Use it rather than the default if you don't
118 // care about parsing the response body. We use it below as a means to populate
119 // the cache with requested resource URLs.
120 class URLFetcherNullWriter : public net::URLFetcherResponseWriter {
121 public:
122 int Initialize(const net::CompletionCallback& callback) override {
123 return net::OK;
126 int Write(net::IOBuffer* buffer,
127 int num_bytes,
128 const net::CompletionCallback& callback) override {
129 return num_bytes;
132 int Finish(const net::CompletionCallback& callback) override {
133 return net::OK;
137 } // namespace
139 PrecacheFetcher::Fetcher::Fetcher(
140 net::URLRequestContextGetter* request_context,
141 const GURL& url,
142 const base::Callback<void(const URLFetcher&)>& callback,
143 bool is_resource_request)
144 : request_context_(request_context),
145 url_(url),
146 callback_(callback),
147 is_resource_request_(is_resource_request),
148 response_bytes_(0),
149 network_response_bytes_(0) {
150 if (is_resource_request_)
151 LoadFromCache();
152 else
153 LoadFromNetwork();
156 PrecacheFetcher::Fetcher::~Fetcher() {}
158 void PrecacheFetcher::Fetcher::LoadFromCache() {
159 fetch_stage_ = FetchStage::CACHE;
160 url_fetcher_cache_ = URLFetcher::Create(url_, URLFetcher::GET, this);
161 url_fetcher_cache_->SetRequestContext(request_context_);
162 url_fetcher_cache_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE | kNoTracking);
163 scoped_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
164 url_fetcher_cache_->SaveResponseWithWriter(null_writer.Pass());
165 url_fetcher_cache_->Start();
168 void PrecacheFetcher::Fetcher::LoadFromNetwork() {
169 fetch_stage_ = FetchStage::NETWORK;
170 url_fetcher_network_ = URLFetcher::Create(url_, URLFetcher::GET, this);
171 url_fetcher_network_->SetRequestContext(request_context_);
172 if (is_resource_request_) {
173 // LOAD_VALIDATE_CACHE allows us to refresh Date headers for resources
174 // already in the cache. The Date headers are updated from 304s as well as
175 // 200s.
176 url_fetcher_network_->SetLoadFlags(net::LOAD_VALIDATE_CACHE | kNoTracking);
177 // We don't need a copy of the response body for resource requests. The
178 // request is issued only to populate the browser cache.
179 scoped_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
180 url_fetcher_network_->SaveResponseWithWriter(null_writer.Pass());
181 } else {
182 // Config and manifest requests do not need to be revalidated. It's okay if
183 // they expire from the cache minutes after we request them.
184 url_fetcher_network_->SetLoadFlags(kNoTracking);
186 url_fetcher_network_->Start();
189 void PrecacheFetcher::Fetcher::OnURLFetchComplete(const URLFetcher* source) {
190 if (fetch_stage_ == FetchStage::CACHE &&
191 (source->GetStatus().error() == net::ERR_CACHE_MISS ||
192 (source->GetResponseHeaders() &&
193 source->GetResponseHeaders()->HasStrongValidators()))) {
194 // If the resource was not found in the cache, request it from the
195 // network.
197 // If the resource was found in the cache, but contains validators,
198 // request a refresh. The presence of validators increases the chance that
199 // we get a 304 response rather than a full one, thus allowing us to
200 // refresh the cache with minimal network load.
202 // TODO(twifkak): Add support for weak validators, which should be just as
203 // likely a guarantee that the response will be a 304.
204 LoadFromNetwork();
205 return;
208 // If any of:
209 // - The request was for a config or manifest.
210 // - The resource was a cache hit without validators.
211 // - The response came from the network.
212 // Then Fetcher is done with this URL and can return control to the caller.
213 response_bytes_ = source->GetReceivedResponseContentLength();
214 network_response_bytes_ = source->GetTotalReceivedBytes();
215 callback_.Run(*source);
218 PrecacheFetcher::PrecacheFetcher(
219 const std::vector<std::string>& starting_hosts,
220 net::URLRequestContextGetter* request_context,
221 const GURL& config_url,
222 const std::string& manifest_url_prefix,
223 PrecacheFetcher::PrecacheDelegate* precache_delegate)
224 : starting_hosts_(starting_hosts),
225 request_context_(request_context),
226 config_url_(config_url),
227 manifest_url_prefix_(manifest_url_prefix),
228 precache_delegate_(precache_delegate),
229 total_response_bytes_(0),
230 network_response_bytes_(0),
231 num_manifest_urls_to_fetch_(0) {
232 DCHECK(request_context_.get()); // Request context must be non-NULL.
233 DCHECK(precache_delegate_); // Precache delegate must be non-NULL.
235 DCHECK_NE(GURL(), GetDefaultConfigURL())
236 << "Could not determine the precache config settings URL.";
237 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
238 << "Could not determine the default precache manifest URL prefix.";
241 PrecacheFetcher::~PrecacheFetcher() {
242 // Number of manifests for which we have downloaded all resources.
243 int manifests_completed =
244 num_manifest_urls_to_fetch_ - manifest_urls_to_fetch_.size();
246 // If there are resource URLs left to fetch, the last manifest is not yet
247 // completed.
248 if (!resource_urls_to_fetch_.empty())
249 --manifests_completed;
251 DCHECK_GE(manifests_completed, 0);
252 int percent_completed = num_manifest_urls_to_fetch_ == 0
254 : (static_cast<double>(manifests_completed) /
255 num_manifest_urls_to_fetch_ * 100);
256 UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
257 percent_completed);
258 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
259 total_response_bytes_, 1, kMaxResponseBytes, 100);
260 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
261 network_response_bytes_, 1, kMaxResponseBytes,
262 100);
265 void PrecacheFetcher::Start() {
266 DCHECK(!fetcher_); // Start shouldn't be called repeatedly.
268 GURL config_url =
269 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
271 DCHECK(config_url.is_valid()) << "Config URL not valid: "
272 << config_url.possibly_invalid_spec();
274 start_time_ = base::TimeTicks::Now();
276 // Fetch the precache configuration settings from the server.
277 fetcher_.reset(new Fetcher(request_context_.get(), config_url,
278 base::Bind(&PrecacheFetcher::OnConfigFetchComplete,
279 base::Unretained(this)),
280 false /* is_resource_request */));
283 void PrecacheFetcher::StartNextFetch() {
284 total_response_bytes_ += fetcher_->response_bytes();
285 network_response_bytes_ += fetcher_->network_response_bytes();
287 if (!resource_urls_to_fetch_.empty()) {
288 // Fetch the next resource URL.
289 fetcher_.reset(
290 new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(),
291 base::Bind(&PrecacheFetcher::OnResourceFetchComplete,
292 base::Unretained(this)),
293 true /* is_resource_request */));
295 resource_urls_to_fetch_.pop_front();
296 return;
299 if (!manifest_urls_to_fetch_.empty()) {
300 // Fetch the next manifest URL.
301 fetcher_.reset(
302 new Fetcher(request_context_.get(), manifest_urls_to_fetch_.front(),
303 base::Bind(&PrecacheFetcher::OnManifestFetchComplete,
304 base::Unretained(this)),
305 false /* is_resource_request */));
307 manifest_urls_to_fetch_.pop_front();
308 return;
311 // There are no more URLs to fetch, so end the precache cycle.
312 base::TimeDelta time_to_fetch = base::TimeTicks::Now() - start_time_;
313 UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch,
314 base::TimeDelta::FromSeconds(1),
315 base::TimeDelta::FromHours(4), 50);
317 precache_delegate_->OnDone();
318 // OnDone may have deleted this PrecacheFetcher, so don't do anything after it
319 // is called.
322 void PrecacheFetcher::OnConfigFetchComplete(const URLFetcher& source) {
323 // Attempt to parse the config proto. On failure, continue on with the default
324 // configuration.
325 PrecacheConfigurationSettings config;
326 ParseProtoFromFetchResponse(source, &config);
328 std::string prefix = manifest_url_prefix_.empty()
329 ? GetDefaultManifestURLPrefix()
330 : manifest_url_prefix_;
331 DCHECK_NE(std::string(), prefix)
332 << "Could not determine the precache manifest URL prefix.";
334 // Keep track of manifest URLs that are being fetched, in order to remove
335 // duplicates.
336 base::hash_set<std::string> unique_manifest_urls;
338 // Attempt to fetch manifests for starting hosts up to the maximum top sites
339 // count. If a manifest does not exist for a particular starting host, then
340 // the fetch will fail, and that starting host will be ignored.
341 int64 rank = 0;
342 for (const std::string& host : starting_hosts_) {
343 ++rank;
344 if (rank > config.top_sites_count())
345 break;
346 unique_manifest_urls.insert(ConstructManifestURL(prefix, host));
349 for (const std::string& url : config.forced_site())
350 unique_manifest_urls.insert(ConstructManifestURL(prefix, url));
352 for (const std::string& manifest_url : unique_manifest_urls)
353 manifest_urls_to_fetch_.push_back(GURL(manifest_url));
354 num_manifest_urls_to_fetch_ = manifest_urls_to_fetch_.size();
356 StartNextFetch();
359 void PrecacheFetcher::OnManifestFetchComplete(const URLFetcher& source) {
360 PrecacheManifest manifest;
362 if (ParseProtoFromFetchResponse(source, &manifest)) {
363 for (int i = 0; i < manifest.resource_size(); ++i) {
364 if (manifest.resource(i).has_url()) {
365 resource_urls_to_fetch_.push_back(GURL(manifest.resource(i).url()));
370 StartNextFetch();
373 void PrecacheFetcher::OnResourceFetchComplete(const URLFetcher& source) {
374 // The resource has already been put in the cache during the fetch process, so
375 // nothing more needs to be done for the resource.
376 StartNextFetch();
379 } // namespace precache