Update path of checkdeps to buildtools checkout
[chromium-blink-merge.git] / extensions / browser / content_hash_fetcher.cc
blob053f704433c07cf731f0171c1a4bb2084db584a8
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "extensions/browser/content_hash_fetcher.h"
7 #include <algorithm>
9 #include "base/base64.h"
10 #include "base/file_util.h"
11 #include "base/files/file_enumerator.h"
12 #include "base/json/json_reader.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/stl_util.h"
15 #include "base/synchronization/lock.h"
16 #include "base/task_runner_util.h"
17 #include "base/version.h"
18 #include "content/public/browser/browser_context.h"
19 #include "content/public/browser/browser_thread.h"
20 #include "crypto/secure_hash.h"
21 #include "crypto/sha2.h"
22 #include "extensions/browser/computed_hashes.h"
23 #include "extensions/browser/extension_registry.h"
24 #include "extensions/common/constants.h"
25 #include "extensions/common/extension.h"
26 #include "extensions/common/file_util.h"
27 #include "net/base/load_flags.h"
28 #include "net/url_request/url_fetcher.h"
29 #include "net/url_request/url_fetcher_delegate.h"
30 #include "net/url_request/url_request_status.h"
32 namespace {
34 typedef std::set<base::FilePath> SortedFilePathSet;
36 } // namespace
38 namespace extensions {
40 // This class takes care of doing the disk and network I/O work to ensure we
41 // have both verified_contents.json files from the webstore and
42 // computed_hashes.json files computed over the files in an extension's
43 // directory.
44 class ContentHashFetcherJob
45 : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
46 public net::URLFetcherDelegate {
47 public:
48 typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
49 ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
50 const std::string& extension_id,
51 const base::FilePath& extension_path,
52 const GURL& fetch_url,
53 const CompletionCallback& callback);
55 void Start();
57 // Cancels this job, which will attempt to stop I/O operations sooner than
58 // just waiting for the entire job to complete. Safe to call from any thread.
59 void Cancel();
61 // Returns whether this job was completely successful (we have both verified
62 // contents and computed hashes).
63 bool success() { return success_; }
65 // Do we have a verified_contents.json file?
66 bool have_verified_contents() { return have_verified_contents_; }
68 private:
69 friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
70 virtual ~ContentHashFetcherJob();
72 // Checks whether this job has been cancelled. Safe to call from any thread.
73 bool IsCancelled();
75 // Callback for when we're done doing file I/O to see if we already have
76 // a verified contents file. If we don't, this will kick off a network
77 // request to get one.
78 void DoneCheckingForVerifiedContents(bool found);
80 // URLFetcherDelegate interface
81 virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
83 // Callback for when we're done ensuring we have verified contents, and are
84 // ready to move on to MaybeCreateHashes.
85 void DoneFetchingVerifiedContents(bool success);
87 // Callback for the job to write the verified contents to the filesystem.
88 void OnVerifiedContentsWritten(size_t expected_size, int write_result);
90 // The verified contents file from the webstore only contains the treehash
91 // root hash, but for performance we want to cache the individual block level
92 // hashes. This function will create that cache with block-level hashes for
93 // each file in the extension if needed (the treehash root hash for each of
94 // these should equal what is in the verified contents file from the
95 // webstore).
96 void MaybeCreateHashes();
98 // Computes hashes for all files in |extension_path_|, and uses a
99 // ComputedHashes::Writer to write that information into
100 // |hashes_file|. Returns true on success.
101 bool CreateHashes(const base::FilePath& hashes_file);
103 // Will call the callback, if we haven't been cancelled.
104 void DispatchCallback();
106 net::URLRequestContextGetter* request_context_;
107 std::string extension_id_;
108 base::FilePath extension_path_;
110 // The url we'll need to use to fetch a verified_contents.json file.
111 GURL fetch_url_;
113 CompletionCallback callback_;
114 content::BrowserThread::ID creation_thread_;
116 // Used for fetching content signatures.
117 scoped_ptr<net::URLFetcher> url_fetcher_;
119 // Whether this job succeeded.
120 bool success_;
122 // Whether we either found a verified contents file, or were successful in
123 // fetching one and saving it to disk.
124 bool have_verified_contents_;
126 // The block size to use for hashing.
127 int block_size_;
129 // Note: this may be accessed from multiple threads, so all access should
130 // be protected by |cancelled_lock_|.
131 bool cancelled_;
133 // A lock for synchronizing access to |cancelled_|.
134 base::Lock cancelled_lock_;
137 ContentHashFetcherJob::ContentHashFetcherJob(
138 net::URLRequestContextGetter* request_context,
139 const std::string& extension_id,
140 const base::FilePath& extension_path,
141 const GURL& fetch_url,
142 const CompletionCallback& callback)
143 : request_context_(request_context),
144 extension_id_(extension_id),
145 extension_path_(extension_path),
146 fetch_url_(fetch_url),
147 callback_(callback),
148 success_(false),
149 have_verified_contents_(false),
150 // TODO(asargent) - use the value from verified_contents.json for each
151 // file, instead of using a constant.
152 block_size_(4096),
153 cancelled_(false) {
154 bool got_id =
155 content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
156 DCHECK(got_id);
159 void ContentHashFetcherJob::Start() {
160 base::FilePath verified_contents_path =
161 file_util::GetVerifiedContentsPath(extension_path_);
162 base::PostTaskAndReplyWithResult(
163 content::BrowserThread::GetBlockingPool(),
164 FROM_HERE,
165 base::Bind(&base::PathExists, verified_contents_path),
166 base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
167 this));
170 void ContentHashFetcherJob::Cancel() {
171 base::AutoLock autolock(cancelled_lock_);
172 cancelled_ = true;
175 ContentHashFetcherJob::~ContentHashFetcherJob() {
178 bool ContentHashFetcherJob::IsCancelled() {
179 base::AutoLock autolock(cancelled_lock_);
180 bool result = cancelled_;
181 return result;
184 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
185 if (IsCancelled())
186 return;
187 if (found) {
188 DoneFetchingVerifiedContents(true);
189 } else {
190 url_fetcher_.reset(
191 net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
192 url_fetcher_->SetRequestContext(request_context_);
193 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
194 net::LOAD_DO_NOT_SAVE_COOKIES |
195 net::LOAD_DISABLE_CACHE);
196 url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
197 url_fetcher_->Start();
201 // Helper function to let us pass ownership of a string via base::Bind with the
202 // contents to be written into a file. Also ensures that the directory for
203 // |path| exists, creating it if needed.
204 static int WriteFileHelper(const base::FilePath& path,
205 scoped_ptr<std::string> content) {
206 base::FilePath dir = path.DirName();
207 return (base::CreateDirectoryAndGetError(dir, NULL) &&
208 base::WriteFile(path, content->data(), content->size()));
211 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
212 if (IsCancelled())
213 return;
214 scoped_ptr<std::string> response(new std::string);
215 if (!url_fetcher_->GetStatus().is_success() ||
216 !url_fetcher_->GetResponseAsString(response.get())) {
217 DoneFetchingVerifiedContents(false);
218 return;
221 // Parse the response to make sure it is valid json (on staging sometimes it
222 // can be a login redirect html, xml file, etc. if you aren't logged in with
223 // the right cookies). TODO(asargent) - It would be a nice enhancement to
224 // move to parsing this in a sandboxed helper (crbug.com/372878).
225 scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
226 if (parsed) {
227 parsed.reset(); // no longer needed
228 base::FilePath destination =
229 file_util::GetVerifiedContentsPath(extension_path_);
230 size_t size = response->size();
231 base::PostTaskAndReplyWithResult(
232 content::BrowserThread::GetBlockingPool(),
233 FROM_HERE,
234 base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
235 base::Bind(
236 &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
237 } else {
238 DoneFetchingVerifiedContents(false);
242 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
243 int write_result) {
244 bool success =
245 (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
246 DoneFetchingVerifiedContents(success);
249 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
250 have_verified_contents_ = success;
252 if (IsCancelled())
253 return;
255 // TODO(asargent) - eventually we should abort here on !success, but for
256 // testing purposes it's actually still helpful to continue on to create the
257 // computed hashes.
259 content::BrowserThread::PostBlockingPoolSequencedTask(
260 "ContentHashFetcher",
261 FROM_HERE,
262 base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
265 void ContentHashFetcherJob::MaybeCreateHashes() {
266 if (IsCancelled())
267 return;
268 base::FilePath hashes_file =
269 file_util::GetComputedHashesPath(extension_path_);
271 if (base::PathExists(hashes_file))
272 success_ = true;
273 else
274 success_ = CreateHashes(hashes_file);
276 content::BrowserThread::PostTask(
277 creation_thread_,
278 FROM_HERE,
279 base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
282 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
283 if (IsCancelled())
284 return false;
285 // Make sure the directory exists.
286 if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
287 return false;
289 base::FileEnumerator enumerator(extension_path_,
290 true, /* recursive */
291 base::FileEnumerator::FILES);
292 // First discover all the file paths and put them in a sorted set.
293 SortedFilePathSet paths;
294 for (;;) {
295 if (IsCancelled())
296 return false;
298 base::FilePath full_path = enumerator.Next();
299 if (full_path.empty())
300 break;
301 paths.insert(full_path);
304 // Now iterate over all the paths in sorted order and compute the block hashes
305 // for each one.
306 ComputedHashes::Writer writer;
307 for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
308 if (IsCancelled())
309 return false;
310 const base::FilePath& full_path = *i;
311 base::FilePath relative_path;
312 extension_path_.AppendRelativePath(full_path, &relative_path);
313 std::string contents;
314 if (!base::ReadFileToString(full_path, &contents)) {
315 LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
316 continue;
319 // Iterate through taking the hash of each block of size (block_size_) of
320 // the file.
321 std::vector<std::string> hashes;
322 size_t offset = 0;
323 while (offset < contents.size()) {
324 if (IsCancelled())
325 return false;
326 const char* block_start = contents.data() + offset;
327 size_t bytes_to_read =
328 std::min(contents.size() - offset, static_cast<size_t>(block_size_));
329 DCHECK(bytes_to_read > 0);
330 scoped_ptr<crypto::SecureHash> hash(
331 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
332 hash->Update(block_start, bytes_to_read);
334 hashes.push_back(std::string());
335 std::string* buffer = &hashes.back();
336 buffer->resize(crypto::kSHA256Length);
337 hash->Finish(string_as_array(buffer), buffer->size());
339 // Get ready for next iteration.
340 offset += bytes_to_read;
342 writer.AddHashes(relative_path, block_size_, hashes);
344 return writer.WriteToFile(hashes_file);
347 void ContentHashFetcherJob::DispatchCallback() {
349 base::AutoLock autolock(cancelled_lock_);
350 if (cancelled_)
351 return;
353 callback_.Run(this);
356 // ----
358 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
359 ContentVerifierDelegate* delegate)
360 : context_(context),
361 delegate_(delegate),
362 observer_(this),
363 weak_ptr_factory_(this) {
366 ContentHashFetcher::~ContentHashFetcher() {
367 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
368 i->second->Cancel();
372 void ContentHashFetcher::Start() {
373 ExtensionRegistry* registry = ExtensionRegistry::Get(context_);
374 observer_.Add(registry);
377 void ContentHashFetcher::DoFetch(const Extension* extension) {
378 if (!extension || !delegate_->ShouldBeVerified(*extension))
379 return;
381 IdAndVersion key(extension->id(), extension->version()->GetString());
382 if (ContainsKey(jobs_, key))
383 return;
385 // TODO(asargent) - we should do something here to remember recent attempts
386 // to fetch signatures by extension id, and use exponential backoff to avoid
387 // hammering the server when we aren't successful in getting them.
388 // crbug.com/373397
390 DCHECK(extension->version());
391 GURL url =
392 delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
393 ContentHashFetcherJob* job =
394 new ContentHashFetcherJob(context_->GetRequestContext(),
395 extension->id(),
396 extension->path(),
397 url,
398 base::Bind(&ContentHashFetcher::JobFinished,
399 weak_ptr_factory_.GetWeakPtr()));
400 jobs_.insert(std::make_pair(key, job));
401 job->Start();
404 void ContentHashFetcher::OnExtensionLoaded(
405 content::BrowserContext* browser_context,
406 const Extension* extension) {
407 CHECK(extension);
408 DoFetch(extension);
411 void ContentHashFetcher::OnExtensionUnloaded(
412 content::BrowserContext* browser_context,
413 const Extension* extension,
414 UnloadedExtensionInfo::Reason reason) {
415 CHECK(extension);
416 IdAndVersion key(extension->id(), extension->version()->GetString());
417 JobMap::iterator found = jobs_.find(key);
418 if (found != jobs_.end())
419 jobs_.erase(found);
422 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
423 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
424 if (i->second.get() == job) {
425 jobs_.erase(i);
426 break;
431 } // namespace extensions