1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "extensions/browser/content_hash_fetcher.h"
9 #include "base/base64.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/file_util.h"
12 #include "base/json/json_reader.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/metrics/histogram.h"
15 #include "base/synchronization/lock.h"
16 #include "base/task_runner_util.h"
17 #include "base/timer/elapsed_timer.h"
18 #include "base/version.h"
19 #include "content/public/browser/browser_context.h"
20 #include "content/public/browser/browser_thread.h"
21 #include "crypto/sha2.h"
22 #include "extensions/browser/computed_hashes.h"
23 #include "extensions/browser/content_hash_tree.h"
24 #include "extensions/browser/content_verifier_delegate.h"
25 #include "extensions/browser/verified_contents.h"
26 #include "extensions/common/constants.h"
27 #include "extensions/common/extension.h"
28 #include "extensions/common/file_util.h"
29 #include "net/base/load_flags.h"
30 #include "net/url_request/url_fetcher.h"
31 #include "net/url_request/url_fetcher_delegate.h"
32 #include "net/url_request/url_request_status.h"
36 typedef std::set
<base::FilePath
> SortedFilePathSet
;
40 namespace extensions
{
42 // This class takes care of doing the disk and network I/O work to ensure we
43 // have both verified_contents.json files from the webstore and
44 // computed_hashes.json files computed over the files in an extension's
46 class ContentHashFetcherJob
47 : public base::RefCountedThreadSafe
<ContentHashFetcherJob
>,
48 public net::URLFetcherDelegate
{
50 typedef base::Callback
<void(ContentHashFetcherJob
*)> CompletionCallback
;
51 ContentHashFetcherJob(net::URLRequestContextGetter
* request_context
,
52 const ContentVerifierKey
& key
,
53 const std::string
& extension_id
,
54 const base::FilePath
& extension_path
,
55 const GURL
& fetch_url
,
57 const CompletionCallback
& callback
);
61 // Cancels this job, which will attempt to stop I/O operations sooner than
62 // just waiting for the entire job to complete. Safe to call from any thread.
65 // Checks whether this job has been cancelled. Safe to call from any thread.
68 // Returns whether this job was successful (we have both verified contents
69 // and computed hashes). Even if the job was a success, there might have been
70 // files that were found to have contents not matching expectations; these
71 // are available by calling hash_mismatch_paths().
72 bool success() { return success_
; }
74 bool force() { return force_
; }
76 const std::string
& extension_id() { return extension_id_
; }
78 // Returns the set of paths that had a hash mismatch.
79 const std::set
<base::FilePath
>& hash_mismatch_paths() {
80 return hash_mismatch_paths_
;
84 friend class base::RefCountedThreadSafe
<ContentHashFetcherJob
>;
85 ~ContentHashFetcherJob() override
;
87 // Tries to load a verified_contents.json file at |path|. On successfully
88 // reading and validing the file, the verified_contents_ member variable will
89 // be set and this function will return true. If the file does not exist, or
90 // exists but is invalid, it will return false. Also, any invalid
91 // file will be removed from disk and
92 bool LoadVerifiedContents(const base::FilePath
& path
);
94 // Callback for when we're done doing file I/O to see if we already have
95 // a verified contents file. If we don't, this will kick off a network
96 // request to get one.
97 void DoneCheckingForVerifiedContents(bool found
);
99 // URLFetcherDelegate interface
100 void OnURLFetchComplete(const net::URLFetcher
* source
) override
;
102 // Callback for when we're done ensuring we have verified contents, and are
103 // ready to move on to MaybeCreateHashes.
104 void DoneFetchingVerifiedContents(bool success
);
106 // Callback for the job to write the verified contents to the filesystem.
107 void OnVerifiedContentsWritten(size_t expected_size
, int write_result
);
109 // The verified contents file from the webstore only contains the treehash
110 // root hash, but for performance we want to cache the individual block level
111 // hashes. This function will create that cache with block-level hashes for
112 // each file in the extension if needed (the treehash root hash for each of
113 // these should equal what is in the verified contents file from the
115 void MaybeCreateHashes();
117 // Computes hashes for all files in |extension_path_|, and uses a
118 // ComputedHashes::Writer to write that information into
119 // |hashes_file|. Returns true on success.
120 bool CreateHashes(const base::FilePath
& hashes_file
);
122 // Will call the callback, if we haven't been cancelled.
123 void DispatchCallback();
125 net::URLRequestContextGetter
* request_context_
;
126 std::string extension_id_
;
127 base::FilePath extension_path_
;
129 // The url we'll need to use to fetch a verified_contents.json file.
134 CompletionCallback callback_
;
135 content::BrowserThread::ID creation_thread_
;
137 // Used for fetching content signatures.
138 scoped_ptr
<net::URLFetcher
> url_fetcher_
;
140 // The key used to validate verified_contents.json.
141 ContentVerifierKey key_
;
143 // The parsed contents of the verified_contents.json file, either read from
144 // disk or fetched from the network and then written to disk.
145 scoped_ptr
<VerifiedContents
> verified_contents_
;
147 // Whether this job succeeded.
150 // Paths that were found to have a mismatching hash.
151 std::set
<base::FilePath
> hash_mismatch_paths_
;
153 // The block size to use for hashing.
156 // Note: this may be accessed from multiple threads, so all access should
157 // be protected by |cancelled_lock_|.
160 // A lock for synchronizing access to |cancelled_|.
161 base::Lock cancelled_lock_
;
163 DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob
);
166 ContentHashFetcherJob::ContentHashFetcherJob(
167 net::URLRequestContextGetter
* request_context
,
168 const ContentVerifierKey
& key
,
169 const std::string
& extension_id
,
170 const base::FilePath
& extension_path
,
171 const GURL
& fetch_url
,
173 const CompletionCallback
& callback
)
174 : request_context_(request_context
),
175 extension_id_(extension_id
),
176 extension_path_(extension_path
),
177 fetch_url_(fetch_url
),
182 // TODO(asargent) - use the value from verified_contents.json for each
183 // file, instead of using a constant.
187 content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_
);
191 void ContentHashFetcherJob::Start() {
192 base::FilePath verified_contents_path
=
193 file_util::GetVerifiedContentsPath(extension_path_
);
194 base::PostTaskAndReplyWithResult(
195 content::BrowserThread::GetBlockingPool(),
197 base::Bind(&ContentHashFetcherJob::LoadVerifiedContents
,
199 verified_contents_path
),
200 base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents
,
204 void ContentHashFetcherJob::Cancel() {
205 base::AutoLock
autolock(cancelled_lock_
);
209 bool ContentHashFetcherJob::IsCancelled() {
210 base::AutoLock
autolock(cancelled_lock_
);
211 bool result
= cancelled_
;
215 ContentHashFetcherJob::~ContentHashFetcherJob() {
218 bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath
& path
) {
219 if (!base::PathExists(path
))
221 verified_contents_
.reset(new VerifiedContents(key_
.data
, key_
.size
));
222 if (!verified_contents_
->InitFrom(path
, false)) {
223 verified_contents_
.reset();
224 if (!base::DeleteFile(path
, false))
225 LOG(WARNING
) << "Failed to delete " << path
.value();
231 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found
) {
235 VLOG(1) << "Found verified contents for " << extension_id_
;
236 DoneFetchingVerifiedContents(true);
238 VLOG(1) << "Missing verified contents for " << extension_id_
241 net::URLFetcher::Create(fetch_url_
, net::URLFetcher::GET
, this);
242 url_fetcher_
->SetRequestContext(request_context_
);
243 url_fetcher_
->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES
|
244 net::LOAD_DO_NOT_SAVE_COOKIES
|
245 net::LOAD_DISABLE_CACHE
);
246 url_fetcher_
->SetAutomaticallyRetryOnNetworkChanges(3);
247 url_fetcher_
->Start();
251 // Helper function to let us pass ownership of a string via base::Bind with the
252 // contents to be written into a file. Also ensures that the directory for
253 // |path| exists, creating it if needed.
254 static int WriteFileHelper(const base::FilePath
& path
,
255 scoped_ptr
<std::string
> content
) {
256 base::FilePath dir
= path
.DirName();
257 return (base::CreateDirectoryAndGetError(dir
, NULL
) &&
258 base::WriteFile(path
, content
->data(), content
->size()));
261 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher
* source
) {
262 VLOG(1) << "URLFetchComplete for " << extension_id_
263 << " is_success:" << url_fetcher_
->GetStatus().is_success() << " "
264 << fetch_url_
.possibly_invalid_spec();
267 scoped_ptr
<std::string
> response(new std::string
);
268 if (!url_fetcher_
->GetStatus().is_success() ||
269 !url_fetcher_
->GetResponseAsString(response
.get())) {
270 DoneFetchingVerifiedContents(false);
274 // Parse the response to make sure it is valid json (on staging sometimes it
275 // can be a login redirect html, xml file, etc. if you aren't logged in with
276 // the right cookies). TODO(asargent) - It would be a nice enhancement to
277 // move to parsing this in a sandboxed helper (crbug.com/372878).
278 scoped_ptr
<base::Value
> parsed(base::JSONReader::Read(*response
));
280 VLOG(1) << "JSON parsed ok for " << extension_id_
;
282 parsed
.reset(); // no longer needed
283 base::FilePath destination
=
284 file_util::GetVerifiedContentsPath(extension_path_
);
285 size_t size
= response
->size();
286 base::PostTaskAndReplyWithResult(
287 content::BrowserThread::GetBlockingPool(),
289 base::Bind(&WriteFileHelper
, destination
, base::Passed(&response
)),
291 &ContentHashFetcherJob::OnVerifiedContentsWritten
, this, size
));
293 DoneFetchingVerifiedContents(false);
297 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size
,
300 (write_result
>= 0 && static_cast<size_t>(write_result
) == expected_size
);
301 DoneFetchingVerifiedContents(success
);
304 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success
) {
313 content::BrowserThread::PostBlockingPoolSequencedTask(
314 "ContentHashFetcher",
316 base::Bind(&ContentHashFetcherJob::MaybeCreateHashes
, this));
319 void ContentHashFetcherJob::MaybeCreateHashes() {
322 base::FilePath hashes_file
=
323 file_util::GetComputedHashesPath(extension_path_
);
325 if (!force_
&& base::PathExists(hashes_file
)) {
329 base::DeleteFile(hashes_file
, false /* recursive */);
330 success_
= CreateHashes(hashes_file
);
333 content::BrowserThread::PostTask(
336 base::Bind(&ContentHashFetcherJob::DispatchCallback
, this));
339 bool ContentHashFetcherJob::CreateHashes(const base::FilePath
& hashes_file
) {
340 base::ElapsedTimer timer
;
343 // Make sure the directory exists.
344 if (!base::CreateDirectoryAndGetError(hashes_file
.DirName(), NULL
))
347 if (!verified_contents_
.get()) {
348 base::FilePath verified_contents_path
=
349 file_util::GetVerifiedContentsPath(extension_path_
);
350 verified_contents_
.reset(new VerifiedContents(key_
.data
, key_
.size
));
351 if (!verified_contents_
->InitFrom(verified_contents_path
, false))
353 verified_contents_
.reset();
356 base::FileEnumerator
enumerator(extension_path_
,
357 true, /* recursive */
358 base::FileEnumerator::FILES
);
359 // First discover all the file paths and put them in a sorted set.
360 SortedFilePathSet paths
;
365 base::FilePath full_path
= enumerator
.Next();
366 if (full_path
.empty())
368 paths
.insert(full_path
);
371 // Now iterate over all the paths in sorted order and compute the block hashes
373 ComputedHashes::Writer writer
;
374 for (SortedFilePathSet::iterator i
= paths
.begin(); i
!= paths
.end(); ++i
) {
377 const base::FilePath
& full_path
= *i
;
378 base::FilePath relative_path
;
379 extension_path_
.AppendRelativePath(full_path
, &relative_path
);
380 relative_path
= relative_path
.NormalizePathSeparatorsTo('/');
382 if (!verified_contents_
->HasTreeHashRoot(relative_path
))
385 std::string contents
;
386 if (!base::ReadFileToString(full_path
, &contents
)) {
387 LOG(ERROR
) << "Could not read " << full_path
.MaybeAsASCII();
391 // Iterate through taking the hash of each block of size (block_size_) of
393 std::vector
<std::string
> hashes
;
394 ComputedHashes::ComputeHashesForContent(contents
, block_size_
, &hashes
);
396 ComputeTreeHashRoot(hashes
, block_size_
/ crypto::kSHA256Length
);
397 if (!verified_contents_
->TreeHashRootEquals(relative_path
, root
)) {
398 VLOG(1) << "content mismatch for " << relative_path
.AsUTF8Unsafe();
399 hash_mismatch_paths_
.insert(relative_path
);
403 writer
.AddHashes(relative_path
, block_size_
, hashes
);
405 bool result
= writer
.WriteToFile(hashes_file
);
406 UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
411 void ContentHashFetcherJob::DispatchCallback() {
413 base::AutoLock
autolock(cancelled_lock_
);
422 ContentHashFetcher::ContentHashFetcher(content::BrowserContext
* context
,
423 ContentVerifierDelegate
* delegate
,
424 const FetchCallback
& callback
)
427 fetch_callback_(callback
),
428 weak_ptr_factory_(this) {
431 ContentHashFetcher::~ContentHashFetcher() {
432 for (JobMap::iterator i
= jobs_
.begin(); i
!= jobs_
.end(); ++i
) {
437 void ContentHashFetcher::DoFetch(const Extension
* extension
, bool force
) {
440 IdAndVersion
key(extension
->id(), extension
->version()->GetString());
441 JobMap::iterator found
= jobs_
.find(key
);
442 if (found
!= jobs_
.end()) {
443 if (!force
|| found
->second
->force()) {
444 // Just let the existing job keep running.
447 // Kill the existing non-force job, so we can start a new one below.
448 found
->second
->Cancel();
453 // TODO(asargent) - we should do something here to remember recent attempts
454 // to fetch signatures by extension id, and use exponential backoff to avoid
455 // hammering the server when we aren't successful in getting them.
458 DCHECK(extension
->version());
460 delegate_
->GetSignatureFetchUrl(extension
->id(), *extension
->version());
461 ContentHashFetcherJob
* job
= new ContentHashFetcherJob(
462 context_
->GetRequestContext(), delegate_
->GetPublicKey(), extension
->id(),
463 extension
->path(), url
, force
,
464 base::Bind(&ContentHashFetcher::JobFinished
,
465 weak_ptr_factory_
.GetWeakPtr()));
466 jobs_
.insert(std::make_pair(key
, job
));
470 void ContentHashFetcher::ExtensionLoaded(const Extension
* extension
) {
472 DoFetch(extension
, false);
475 void ContentHashFetcher::ExtensionUnloaded(const Extension
* extension
) {
477 IdAndVersion
key(extension
->id(), extension
->version()->GetString());
478 JobMap::iterator found
= jobs_
.find(key
);
479 if (found
!= jobs_
.end()) {
480 found
->second
->Cancel();
485 void ContentHashFetcher::JobFinished(ContentHashFetcherJob
* job
) {
486 if (!job
->IsCancelled()) {
487 fetch_callback_
.Run(job
->extension_id(),
490 job
->hash_mismatch_paths());
493 for (JobMap::iterator i
= jobs_
.begin(); i
!= jobs_
.end(); ++i
) {
494 if (i
->second
.get() == job
) {
501 } // namespace extensions