[chromedriver] Explicitly set page loading state when document.readyState != "complete".
[chromium-blink-merge.git] / extensions / browser / content_hash_fetcher.cc
blob8f8b496ecf9ba3c6599bf722d5c0e9fc8b6af00a
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "extensions/browser/content_hash_fetcher.h"
7 #include <algorithm>
9 #include "base/base64.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/file_util.h"
12 #include "base/json/json_reader.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/metrics/histogram.h"
15 #include "base/synchronization/lock.h"
16 #include "base/task_runner_util.h"
17 #include "base/timer/elapsed_timer.h"
18 #include "base/version.h"
19 #include "content/public/browser/browser_context.h"
20 #include "content/public/browser/browser_thread.h"
21 #include "crypto/sha2.h"
22 #include "extensions/browser/computed_hashes.h"
23 #include "extensions/browser/content_hash_tree.h"
24 #include "extensions/browser/content_verifier_delegate.h"
25 #include "extensions/browser/verified_contents.h"
26 #include "extensions/common/constants.h"
27 #include "extensions/common/extension.h"
28 #include "extensions/common/file_util.h"
29 #include "net/base/load_flags.h"
30 #include "net/url_request/url_fetcher.h"
31 #include "net/url_request/url_fetcher_delegate.h"
32 #include "net/url_request/url_request_status.h"
34 namespace {
36 typedef std::set<base::FilePath> SortedFilePathSet;
38 } // namespace
40 namespace extensions {
42 // This class takes care of doing the disk and network I/O work to ensure we
43 // have both verified_contents.json files from the webstore and
44 // computed_hashes.json files computed over the files in an extension's
45 // directory.
46 class ContentHashFetcherJob
47 : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
48 public net::URLFetcherDelegate {
49 public:
50 typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
51 ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
52 const ContentVerifierKey& key,
53 const std::string& extension_id,
54 const base::FilePath& extension_path,
55 const GURL& fetch_url,
56 bool force,
57 const CompletionCallback& callback);
59 void Start();
61 // Cancels this job, which will attempt to stop I/O operations sooner than
62 // just waiting for the entire job to complete. Safe to call from any thread.
63 void Cancel();
65 // Checks whether this job has been cancelled. Safe to call from any thread.
66 bool IsCancelled();
68 // Returns whether this job was successful (we have both verified contents
69 // and computed hashes). Even if the job was a success, there might have been
70 // files that were found to have contents not matching expectations; these
71 // are available by calling hash_mismatch_paths().
72 bool success() { return success_; }
74 bool force() { return force_; }
76 const std::string& extension_id() { return extension_id_; }
78 // Returns the set of paths that had a hash mismatch.
79 const std::set<base::FilePath>& hash_mismatch_paths() {
80 return hash_mismatch_paths_;
83 private:
84 friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
85 ~ContentHashFetcherJob() override;
87 // Tries to load a verified_contents.json file at |path|. On successfully
88 // reading and validing the file, the verified_contents_ member variable will
89 // be set and this function will return true. If the file does not exist, or
90 // exists but is invalid, it will return false. Also, any invalid
91 // file will be removed from disk and
92 bool LoadVerifiedContents(const base::FilePath& path);
94 // Callback for when we're done doing file I/O to see if we already have
95 // a verified contents file. If we don't, this will kick off a network
96 // request to get one.
97 void DoneCheckingForVerifiedContents(bool found);
99 // URLFetcherDelegate interface
100 void OnURLFetchComplete(const net::URLFetcher* source) override;
102 // Callback for when we're done ensuring we have verified contents, and are
103 // ready to move on to MaybeCreateHashes.
104 void DoneFetchingVerifiedContents(bool success);
106 // Callback for the job to write the verified contents to the filesystem.
107 void OnVerifiedContentsWritten(size_t expected_size, int write_result);
109 // The verified contents file from the webstore only contains the treehash
110 // root hash, but for performance we want to cache the individual block level
111 // hashes. This function will create that cache with block-level hashes for
112 // each file in the extension if needed (the treehash root hash for each of
113 // these should equal what is in the verified contents file from the
114 // webstore).
115 void MaybeCreateHashes();
117 // Computes hashes for all files in |extension_path_|, and uses a
118 // ComputedHashes::Writer to write that information into
119 // |hashes_file|. Returns true on success.
120 bool CreateHashes(const base::FilePath& hashes_file);
122 // Will call the callback, if we haven't been cancelled.
123 void DispatchCallback();
125 net::URLRequestContextGetter* request_context_;
126 std::string extension_id_;
127 base::FilePath extension_path_;
129 // The url we'll need to use to fetch a verified_contents.json file.
130 GURL fetch_url_;
132 bool force_;
134 CompletionCallback callback_;
135 content::BrowserThread::ID creation_thread_;
137 // Used for fetching content signatures.
138 scoped_ptr<net::URLFetcher> url_fetcher_;
140 // The key used to validate verified_contents.json.
141 ContentVerifierKey key_;
143 // The parsed contents of the verified_contents.json file, either read from
144 // disk or fetched from the network and then written to disk.
145 scoped_ptr<VerifiedContents> verified_contents_;
147 // Whether this job succeeded.
148 bool success_;
150 // Paths that were found to have a mismatching hash.
151 std::set<base::FilePath> hash_mismatch_paths_;
153 // The block size to use for hashing.
154 int block_size_;
156 // Note: this may be accessed from multiple threads, so all access should
157 // be protected by |cancelled_lock_|.
158 bool cancelled_;
160 // A lock for synchronizing access to |cancelled_|.
161 base::Lock cancelled_lock_;
163 DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob);
166 ContentHashFetcherJob::ContentHashFetcherJob(
167 net::URLRequestContextGetter* request_context,
168 const ContentVerifierKey& key,
169 const std::string& extension_id,
170 const base::FilePath& extension_path,
171 const GURL& fetch_url,
172 bool force,
173 const CompletionCallback& callback)
174 : request_context_(request_context),
175 extension_id_(extension_id),
176 extension_path_(extension_path),
177 fetch_url_(fetch_url),
178 force_(force),
179 callback_(callback),
180 key_(key),
181 success_(false),
182 // TODO(asargent) - use the value from verified_contents.json for each
183 // file, instead of using a constant.
184 block_size_(4096),
185 cancelled_(false) {
186 bool got_id =
187 content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
188 DCHECK(got_id);
191 void ContentHashFetcherJob::Start() {
192 base::FilePath verified_contents_path =
193 file_util::GetVerifiedContentsPath(extension_path_);
194 base::PostTaskAndReplyWithResult(
195 content::BrowserThread::GetBlockingPool(),
196 FROM_HERE,
197 base::Bind(&ContentHashFetcherJob::LoadVerifiedContents,
198 this,
199 verified_contents_path),
200 base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
201 this));
204 void ContentHashFetcherJob::Cancel() {
205 base::AutoLock autolock(cancelled_lock_);
206 cancelled_ = true;
209 bool ContentHashFetcherJob::IsCancelled() {
210 base::AutoLock autolock(cancelled_lock_);
211 bool result = cancelled_;
212 return result;
215 ContentHashFetcherJob::~ContentHashFetcherJob() {
218 bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath& path) {
219 if (!base::PathExists(path))
220 return false;
221 verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
222 if (!verified_contents_->InitFrom(path, false)) {
223 verified_contents_.reset();
224 if (!base::DeleteFile(path, false))
225 LOG(WARNING) << "Failed to delete " << path.value();
226 return false;
228 return true;
231 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
232 if (IsCancelled())
233 return;
234 if (found) {
235 VLOG(1) << "Found verified contents for " << extension_id_;
236 DoneFetchingVerifiedContents(true);
237 } else {
238 VLOG(1) << "Missing verified contents for " << extension_id_
239 << ", fetching...";
240 url_fetcher_ =
241 net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this);
242 url_fetcher_->SetRequestContext(request_context_);
243 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
244 net::LOAD_DO_NOT_SAVE_COOKIES |
245 net::LOAD_DISABLE_CACHE);
246 url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
247 url_fetcher_->Start();
251 // Helper function to let us pass ownership of a string via base::Bind with the
252 // contents to be written into a file. Also ensures that the directory for
253 // |path| exists, creating it if needed.
254 static int WriteFileHelper(const base::FilePath& path,
255 scoped_ptr<std::string> content) {
256 base::FilePath dir = path.DirName();
257 return (base::CreateDirectoryAndGetError(dir, NULL) &&
258 base::WriteFile(path, content->data(), content->size()));
261 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
262 VLOG(1) << "URLFetchComplete for " << extension_id_
263 << " is_success:" << url_fetcher_->GetStatus().is_success() << " "
264 << fetch_url_.possibly_invalid_spec();
265 if (IsCancelled())
266 return;
267 scoped_ptr<std::string> response(new std::string);
268 if (!url_fetcher_->GetStatus().is_success() ||
269 !url_fetcher_->GetResponseAsString(response.get())) {
270 DoneFetchingVerifiedContents(false);
271 return;
274 // Parse the response to make sure it is valid json (on staging sometimes it
275 // can be a login redirect html, xml file, etc. if you aren't logged in with
276 // the right cookies). TODO(asargent) - It would be a nice enhancement to
277 // move to parsing this in a sandboxed helper (crbug.com/372878).
278 scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
279 if (parsed) {
280 VLOG(1) << "JSON parsed ok for " << extension_id_;
282 parsed.reset(); // no longer needed
283 base::FilePath destination =
284 file_util::GetVerifiedContentsPath(extension_path_);
285 size_t size = response->size();
286 base::PostTaskAndReplyWithResult(
287 content::BrowserThread::GetBlockingPool(),
288 FROM_HERE,
289 base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
290 base::Bind(
291 &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
292 } else {
293 DoneFetchingVerifiedContents(false);
297 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
298 int write_result) {
299 bool success =
300 (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
301 DoneFetchingVerifiedContents(success);
304 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
305 if (IsCancelled())
306 return;
308 if (!success) {
309 DispatchCallback();
310 return;
313 content::BrowserThread::PostBlockingPoolSequencedTask(
314 "ContentHashFetcher",
315 FROM_HERE,
316 base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
319 void ContentHashFetcherJob::MaybeCreateHashes() {
320 if (IsCancelled())
321 return;
322 base::FilePath hashes_file =
323 file_util::GetComputedHashesPath(extension_path_);
325 if (!force_ && base::PathExists(hashes_file)) {
326 success_ = true;
327 } else {
328 if (force_)
329 base::DeleteFile(hashes_file, false /* recursive */);
330 success_ = CreateHashes(hashes_file);
333 content::BrowserThread::PostTask(
334 creation_thread_,
335 FROM_HERE,
336 base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
339 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
340 base::ElapsedTimer timer;
341 if (IsCancelled())
342 return false;
343 // Make sure the directory exists.
344 if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
345 return false;
347 if (!verified_contents_.get()) {
348 base::FilePath verified_contents_path =
349 file_util::GetVerifiedContentsPath(extension_path_);
350 verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
351 if (!verified_contents_->InitFrom(verified_contents_path, false))
352 return false;
353 verified_contents_.reset();
356 base::FileEnumerator enumerator(extension_path_,
357 true, /* recursive */
358 base::FileEnumerator::FILES);
359 // First discover all the file paths and put them in a sorted set.
360 SortedFilePathSet paths;
361 for (;;) {
362 if (IsCancelled())
363 return false;
365 base::FilePath full_path = enumerator.Next();
366 if (full_path.empty())
367 break;
368 paths.insert(full_path);
371 // Now iterate over all the paths in sorted order and compute the block hashes
372 // for each one.
373 ComputedHashes::Writer writer;
374 for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
375 if (IsCancelled())
376 return false;
377 const base::FilePath& full_path = *i;
378 base::FilePath relative_path;
379 extension_path_.AppendRelativePath(full_path, &relative_path);
380 relative_path = relative_path.NormalizePathSeparatorsTo('/');
382 if (!verified_contents_->HasTreeHashRoot(relative_path))
383 continue;
385 std::string contents;
386 if (!base::ReadFileToString(full_path, &contents)) {
387 LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
388 continue;
391 // Iterate through taking the hash of each block of size (block_size_) of
392 // the file.
393 std::vector<std::string> hashes;
394 ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes);
395 std::string root =
396 ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
397 if (!verified_contents_->TreeHashRootEquals(relative_path, root)) {
398 VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe();
399 hash_mismatch_paths_.insert(relative_path);
400 continue;
403 writer.AddHashes(relative_path, block_size_, hashes);
405 bool result = writer.WriteToFile(hashes_file);
406 UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
407 timer.Elapsed());
408 return result;
411 void ContentHashFetcherJob::DispatchCallback() {
413 base::AutoLock autolock(cancelled_lock_);
414 if (cancelled_)
415 return;
417 callback_.Run(this);
420 // ----
422 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
423 ContentVerifierDelegate* delegate,
424 const FetchCallback& callback)
425 : context_(context),
426 delegate_(delegate),
427 fetch_callback_(callback),
428 weak_ptr_factory_(this) {
431 ContentHashFetcher::~ContentHashFetcher() {
432 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
433 i->second->Cancel();
437 void ContentHashFetcher::DoFetch(const Extension* extension, bool force) {
438 DCHECK(extension);
440 IdAndVersion key(extension->id(), extension->version()->GetString());
441 JobMap::iterator found = jobs_.find(key);
442 if (found != jobs_.end()) {
443 if (!force || found->second->force()) {
444 // Just let the existing job keep running.
445 return;
446 } else {
447 // Kill the existing non-force job, so we can start a new one below.
448 found->second->Cancel();
449 jobs_.erase(found);
453 // TODO(asargent) - we should do something here to remember recent attempts
454 // to fetch signatures by extension id, and use exponential backoff to avoid
455 // hammering the server when we aren't successful in getting them.
456 // crbug.com/373397
458 DCHECK(extension->version());
459 GURL url =
460 delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
461 ContentHashFetcherJob* job = new ContentHashFetcherJob(
462 context_->GetRequestContext(), delegate_->GetPublicKey(), extension->id(),
463 extension->path(), url, force,
464 base::Bind(&ContentHashFetcher::JobFinished,
465 weak_ptr_factory_.GetWeakPtr()));
466 jobs_.insert(std::make_pair(key, job));
467 job->Start();
470 void ContentHashFetcher::ExtensionLoaded(const Extension* extension) {
471 CHECK(extension);
472 DoFetch(extension, false);
475 void ContentHashFetcher::ExtensionUnloaded(const Extension* extension) {
476 CHECK(extension);
477 IdAndVersion key(extension->id(), extension->version()->GetString());
478 JobMap::iterator found = jobs_.find(key);
479 if (found != jobs_.end()) {
480 found->second->Cancel();
481 jobs_.erase(found);
485 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
486 if (!job->IsCancelled()) {
487 fetch_callback_.Run(job->extension_id(),
488 job->success(),
489 job->force(),
490 job->hash_mismatch_paths());
493 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
494 if (i->second.get() == job) {
495 jobs_.erase(i);
496 break;
501 } // namespace extensions