1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/precache/core/precache_fetcher.h"
10 #include "base/bind.h"
11 #include "base/callback.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/containers/hash_tables.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "components/precache/core/precache_switches.h"
17 #include "components/precache/core/proto/precache.pb.h"
18 #include "net/base/completion_callback.h"
19 #include "net/base/escape.h"
20 #include "net/base/io_buffer.h"
21 #include "net/base/load_flags.h"
22 #include "net/base/net_errors.h"
23 #include "net/http/http_response_headers.h"
24 #include "net/url_request/url_fetcher_response_writer.h"
25 #include "net/url_request/url_request_context_getter.h"
26 #include "net/url_request/url_request_status.h"
28 using net::URLFetcher
;
32 // LOAD_DO_NOT_*_COOKIES is for privacy reasons. If a user clears their
33 // cookies, but a tracking beacon is prefetched and the beacon specifies its
34 // source URL in a URL param, the beacon site would be able to rebuild a
35 // profile of the user.
36 const int kNoCookies
=
37 net::LOAD_DO_NOT_SAVE_COOKIES
| net::LOAD_DO_NOT_SEND_COOKIES
;
41 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
42 // a number we expect to be in the 99th percentile for the histogram, give or
44 const int kMaxResponseBytes
= 500 * 1024 * 1024;
46 GURL
GetDefaultConfigURL() {
47 const base::CommandLine
& command_line
=
48 *base::CommandLine::ForCurrentProcess();
49 if (command_line
.HasSwitch(switches::kPrecacheConfigSettingsURL
)) {
51 command_line
.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL
));
54 #if defined(PRECACHE_CONFIG_SETTINGS_URL)
55 return GURL(PRECACHE_CONFIG_SETTINGS_URL
);
57 // The precache config settings URL could not be determined, so return an
58 // empty, invalid GURL.
63 std::string
GetDefaultManifestURLPrefix() {
64 const base::CommandLine
& command_line
=
65 *base::CommandLine::ForCurrentProcess();
66 if (command_line
.HasSwitch(switches::kPrecacheManifestURLPrefix
)) {
67 return command_line
.GetSwitchValueASCII(
68 switches::kPrecacheManifestURLPrefix
);
71 #if defined(PRECACHE_MANIFEST_URL_PREFIX)
72 return PRECACHE_MANIFEST_URL_PREFIX
;
74 // The precache manifest URL prefix could not be determined, so return an
80 // Construct the URL of the precache manifest for the given name (either host or
81 // URL). The server is expecting a request for a URL consisting of the manifest
82 // URL prefix followed by the doubly escaped name.
83 std::string
ConstructManifestURL(const std::string
& prefix
,
84 const std::string
& name
) {
85 return prefix
+ net::EscapeQueryParamValue(
86 net::EscapeQueryParamValue(name
, false), false);
89 // Attempts to parse a protobuf message from the response string of a
90 // URLFetcher. If parsing is successful, the message parameter will contain the
91 // parsed protobuf and this function will return true. Otherwise, returns false.
92 bool ParseProtoFromFetchResponse(const URLFetcher
& source
,
93 ::google::protobuf::MessageLite
* message
) {
94 std::string response_string
;
96 if (!source
.GetStatus().is_success()) {
97 DLOG(WARNING
) << "Fetch failed: " << source
.GetOriginalURL().spec();
100 if (!source
.GetResponseAsString(&response_string
)) {
101 DLOG(WARNING
) << "No response string present: "
102 << source
.GetOriginalURL().spec();
105 if (!message
->ParseFromString(response_string
)) {
106 DLOG(WARNING
) << "Unable to parse proto served from "
107 << source
.GetOriginalURL().spec();
113 // URLFetcherResponseWriter that ignores the response body, in order to avoid
114 // the unnecessary memory usage. Use it rather than the default if you don't
115 // care about parsing the response body. We use it below as a means to populate
116 // the cache with requested resource URLs.
117 class URLFetcherNullWriter
: public net::URLFetcherResponseWriter
{
119 int Initialize(const net::CompletionCallback
& callback
) override
{
123 int Write(net::IOBuffer
* buffer
,
125 const net::CompletionCallback
& callback
) override
{
129 int Finish(const net::CompletionCallback
& callback
) override
{
136 PrecacheFetcher::Fetcher::Fetcher(
137 net::URLRequestContextGetter
* request_context
,
139 const base::Callback
<void(const URLFetcher
&)>& callback
,
140 bool is_resource_request
)
141 : request_context_(request_context
),
144 is_resource_request_(is_resource_request
),
146 network_response_bytes_(0) {
147 if (is_resource_request_
)
153 PrecacheFetcher::Fetcher::~Fetcher() {}
155 void PrecacheFetcher::Fetcher::LoadFromCache() {
156 fetch_stage_
= FetchStage::CACHE
;
157 url_fetcher_cache_
= URLFetcher::Create(url_
, URLFetcher::GET
, this);
158 url_fetcher_cache_
->SetRequestContext(request_context_
);
159 url_fetcher_cache_
->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE
| kNoCookies
);
160 scoped_ptr
<URLFetcherNullWriter
> null_writer(new URLFetcherNullWriter
);
161 url_fetcher_cache_
->SaveResponseWithWriter(null_writer
.Pass());
162 url_fetcher_cache_
->Start();
165 void PrecacheFetcher::Fetcher::LoadFromNetwork() {
166 fetch_stage_
= FetchStage::NETWORK
;
167 url_fetcher_network_
= URLFetcher::Create(url_
, URLFetcher::GET
, this);
168 url_fetcher_network_
->SetRequestContext(request_context_
);
169 if (is_resource_request_
) {
170 // LOAD_VALIDATE_CACHE allows us to refresh Date headers for resources
171 // already in the cache. The Date headers are updated from 304s as well as
173 url_fetcher_network_
->SetLoadFlags(net::LOAD_VALIDATE_CACHE
| kNoCookies
);
174 // We don't need a copy of the response body for resource requests. The
175 // request is issued only to populate the browser cache.
176 scoped_ptr
<URLFetcherNullWriter
> null_writer(new URLFetcherNullWriter
);
177 url_fetcher_network_
->SaveResponseWithWriter(null_writer
.Pass());
179 // Config and manifest requests do not need to be revalidated. It's okay if
180 // they expire from the cache minutes after we request them.
181 url_fetcher_network_
->SetLoadFlags(kNoCookies
);
183 url_fetcher_network_
->Start();
186 void PrecacheFetcher::Fetcher::OnURLFetchComplete(const URLFetcher
* source
) {
187 if (fetch_stage_
== FetchStage::CACHE
&&
188 (source
->GetStatus().error() == net::ERR_CACHE_MISS
||
189 (source
->GetResponseHeaders() &&
190 source
->GetResponseHeaders()->HasStrongValidators()))) {
191 // If the resource was not found in the cache, request it from the
194 // If the resource was found in the cache, but contains validators,
195 // request a refresh. The presence of validators increases the chance that
196 // we get a 304 response rather than a full one, thus allowing us to
197 // refresh the cache with minimal network load.
199 // TODO(twifkak): Add support for weak validators, which should be just as
200 // likely a guarantee that the response will be a 304.
206 // - The request was for a config or manifest.
207 // - The resource was a cache hit without validators.
208 // - The response came from the network.
209 // Then Fetcher is done with this URL and can return control to the caller.
210 response_bytes_
= source
->GetReceivedResponseContentLength();
211 network_response_bytes_
= source
->GetTotalReceivedBytes();
212 callback_
.Run(*source
);
215 PrecacheFetcher::PrecacheFetcher(
216 const std::vector
<std::string
>& starting_hosts
,
217 net::URLRequestContextGetter
* request_context
,
218 const GURL
& config_url
,
219 const std::string
& manifest_url_prefix
,
220 PrecacheFetcher::PrecacheDelegate
* precache_delegate
)
221 : starting_hosts_(starting_hosts
),
222 request_context_(request_context
),
223 config_url_(config_url
),
224 manifest_url_prefix_(manifest_url_prefix
),
225 precache_delegate_(precache_delegate
),
226 total_response_bytes_(0),
227 network_response_bytes_(0),
228 num_manifest_urls_to_fetch_(0) {
229 DCHECK(request_context_
.get()); // Request context must be non-NULL.
230 DCHECK(precache_delegate_
); // Precache delegate must be non-NULL.
232 DCHECK_NE(GURL(), GetDefaultConfigURL())
233 << "Could not determine the precache config settings URL.";
234 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
235 << "Could not determine the default precache manifest URL prefix.";
238 PrecacheFetcher::~PrecacheFetcher() {
239 // Number of manifests for which we have downloaded all resources.
240 int manifests_completed
=
241 num_manifest_urls_to_fetch_
- manifest_urls_to_fetch_
.size();
243 // If there are resource URLs left to fetch, the last manifest is not yet
245 if (!resource_urls_to_fetch_
.empty())
246 --manifests_completed
;
248 DCHECK_GE(manifests_completed
, 0);
249 int percent_completed
= num_manifest_urls_to_fetch_
== 0
251 : (static_cast<double>(manifests_completed
) /
252 num_manifest_urls_to_fetch_
* 100);
253 UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
255 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
256 total_response_bytes_
, 1, kMaxResponseBytes
, 100);
257 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
258 network_response_bytes_
, 1, kMaxResponseBytes
,
262 void PrecacheFetcher::Start() {
263 DCHECK(!fetcher_
); // Start shouldn't be called repeatedly.
266 config_url_
.is_empty() ? GetDefaultConfigURL() : config_url_
;
268 DCHECK(config_url
.is_valid()) << "Config URL not valid: "
269 << config_url
.possibly_invalid_spec();
271 start_time_
= base::TimeTicks::Now();
273 // Fetch the precache configuration settings from the server.
274 fetcher_
.reset(new Fetcher(request_context_
.get(), config_url
,
275 base::Bind(&PrecacheFetcher::OnConfigFetchComplete
,
276 base::Unretained(this)),
277 false /* is_resource_request */));
280 void PrecacheFetcher::StartNextFetch() {
281 total_response_bytes_
+= fetcher_
->response_bytes();
282 network_response_bytes_
+= fetcher_
->network_response_bytes();
284 if (!resource_urls_to_fetch_
.empty()) {
285 // Fetch the next resource URL.
287 new Fetcher(request_context_
.get(), resource_urls_to_fetch_
.front(),
288 base::Bind(&PrecacheFetcher::OnResourceFetchComplete
,
289 base::Unretained(this)),
290 true /* is_resource_request */));
292 resource_urls_to_fetch_
.pop_front();
296 if (!manifest_urls_to_fetch_
.empty()) {
297 // Fetch the next manifest URL.
299 new Fetcher(request_context_
.get(), manifest_urls_to_fetch_
.front(),
300 base::Bind(&PrecacheFetcher::OnManifestFetchComplete
,
301 base::Unretained(this)),
302 false /* is_resource_request */));
304 manifest_urls_to_fetch_
.pop_front();
308 // There are no more URLs to fetch, so end the precache cycle.
309 base::TimeDelta time_to_fetch
= base::TimeTicks::Now() - start_time_
;
310 UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch
,
311 base::TimeDelta::FromSeconds(1),
312 base::TimeDelta::FromHours(4), 50);
314 precache_delegate_
->OnDone();
315 // OnDone may have deleted this PrecacheFetcher, so don't do anything after it
319 void PrecacheFetcher::OnConfigFetchComplete(const URLFetcher
& source
) {
320 // Attempt to parse the config proto. On failure, continue on with the default
322 PrecacheConfigurationSettings config
;
323 ParseProtoFromFetchResponse(source
, &config
);
325 std::string prefix
= manifest_url_prefix_
.empty()
326 ? GetDefaultManifestURLPrefix()
327 : manifest_url_prefix_
;
328 DCHECK_NE(std::string(), prefix
)
329 << "Could not determine the precache manifest URL prefix.";
331 // Keep track of manifest URLs that are being fetched, in order to remove
333 base::hash_set
<std::string
> unique_manifest_urls
;
335 // Attempt to fetch manifests for starting hosts up to the maximum top sites
336 // count. If a manifest does not exist for a particular starting host, then
337 // the fetch will fail, and that starting host will be ignored.
339 for (const std::string
& host
: starting_hosts_
) {
341 if (rank
> config
.top_sites_count())
343 unique_manifest_urls
.insert(ConstructManifestURL(prefix
, host
));
346 for (const std::string
& url
: config
.forced_site())
347 unique_manifest_urls
.insert(ConstructManifestURL(prefix
, url
));
349 for (const std::string
& manifest_url
: unique_manifest_urls
)
350 manifest_urls_to_fetch_
.push_back(GURL(manifest_url
));
351 num_manifest_urls_to_fetch_
= manifest_urls_to_fetch_
.size();
356 void PrecacheFetcher::OnManifestFetchComplete(const URLFetcher
& source
) {
357 PrecacheManifest manifest
;
359 if (ParseProtoFromFetchResponse(source
, &manifest
)) {
360 for (int i
= 0; i
< manifest
.resource_size(); ++i
) {
361 if (manifest
.resource(i
).has_url()) {
362 resource_urls_to_fetch_
.push_back(GURL(manifest
.resource(i
).url()));
370 void PrecacheFetcher::OnResourceFetchComplete(const URLFetcher
& source
) {
371 // The resource has already been put in the cache during the fetch process, so
372 // nothing more needs to be done for the resource.
376 } // namespace precache