extensions/browser/content_hash_fetcher.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "extensions/browser/content_hash_fetcher.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/base64.h"
  10 #include "base/file_util.h"
  11 #include "base/files/file_enumerator.h"
  12 #include "base/json/json_reader.h"
  13 #include "base/memory/ref_counted.h"
  14 #include "base/stl_util.h"
  15 #include "base/synchronization/lock.h"
  16 #include "base/task_runner_util.h"
  17 #include "base/version.h"
  18 #include "content/public/browser/browser_context.h"
  19 #include "content/public/browser/browser_thread.h"
  20 #include "crypto/secure_hash.h"
  21 #include "crypto/sha2.h"
  22 #include "extensions/browser/computed_hashes.h"
  23 #include "extensions/browser/extension_registry.h"
  24 #include "extensions/common/constants.h"
  25 #include "extensions/common/extension.h"
  26 #include "extensions/common/file_util.h"
  27 #include "net/base/load_flags.h"
  28 #include "net/url_request/url_fetcher.h"
  29 #include "net/url_request/url_fetcher_delegate.h"
  30 #include "net/url_request/url_request_status.h"
  31
  32 namespace {
  33
  34 typedef std::set<base::FilePath> SortedFilePathSet;
  35
  36 }  // namespace
  37
  38 namespace extensions {
  39
  40 // This class takes care of doing the disk and network I/O work to ensure we
  41 // have both verified_contents.json files from the webstore and
  42 // computed_hashes.json files computed over the files in an extension's
  43 // directory.
  44 class ContentHashFetcherJob
  45     : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
  46       public net::URLFetcherDelegate {
  47  public:
  48   typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
  49   ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
  50                         const std::string& extension_id,
  51                         const base::FilePath& extension_path,
  52                         const GURL& fetch_url,
  53                         const CompletionCallback& callback);
  54
  55   void Start();
  56
  57   // Cancels this job, which will attempt to stop I/O operations sooner than
  58   // just waiting for the entire job to complete. Safe to call from any thread.
  59   void Cancel();
  60
  61   // Returns whether this job was completely successful (we have both verified
  62   // contents and computed hashes).
  63   bool success() { return success_; }
  64
  65   // Do we have a verified_contents.json file?
  66   bool have_verified_contents() { return have_verified_contents_; }
  67
  68  private:
  69   friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
  70   virtual ~ContentHashFetcherJob();
  71
  72   // Checks whether this job has been cancelled. Safe to call from any thread.
  73   bool IsCancelled();
  74
  75   // Callback for when we're done doing file I/O to see if we already have
  76   // a verified contents file. If we don't, this will kick off a network
  77   // request to get one.
  78   void DoneCheckingForVerifiedContents(bool found);
  79
  80   // URLFetcherDelegate interface
  81   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
  82
  83   // Callback for when we're done ensuring we have verified contents, and are
  84   // ready to move on to MaybeCreateHashes.
  85   void DoneFetchingVerifiedContents(bool success);
  86
  87   // Callback for the job to write the verified contents to the filesystem.
  88   void OnVerifiedContentsWritten(size_t expected_size, int write_result);
  89
  90   // The verified contents file from the webstore only contains the treehash
  91   // root hash, but for performance we want to cache the individual block level
  92   // hashes. This function will create that cache with block-level hashes for
  93   // each file in the extension if needed (the treehash root hash for each of
  94   // these should equal what is in the verified contents file from the
  95   // webstore).
  96   void MaybeCreateHashes();
  97
  98   // Computes hashes for all files in |extension_path_|, and uses a
  99   // ComputedHashes::Writer to write that information into
 100   // |hashes_file|. Returns true on success.
 101   bool CreateHashes(const base::FilePath& hashes_file);
 102
 103   // Will call the callback, if we haven't been cancelled.
 104   void DispatchCallback();
 105
 106   net::URLRequestContextGetter* request_context_;
 107   std::string extension_id_;
 108   base::FilePath extension_path_;
 109
 110   // The url we'll need to use to fetch a verified_contents.json file.
 111   GURL fetch_url_;
 112
 113   CompletionCallback callback_;
 114   content::BrowserThread::ID creation_thread_;
 115
 116   // Used for fetching content signatures.
 117   scoped_ptr<net::URLFetcher> url_fetcher_;
 118
 119   // Whether this job succeeded.
 120   bool success_;
 121
 122   // Whether we either found a verified contents file, or were successful in
 123   // fetching one and saving it to disk.
 124   bool have_verified_contents_;
 125
 126   // The block size to use for hashing.
 127   int block_size_;
 128
 129   // Note: this may be accessed from multiple threads, so all access should
 130   // be protected by |cancelled_lock_|.
 131   bool cancelled_;
 132
 133   // A lock for synchronizing access to |cancelled_|.
 134   base::Lock cancelled_lock_;
 135 };
 136
 137 ContentHashFetcherJob::ContentHashFetcherJob(
 138     net::URLRequestContextGetter* request_context,
 139     const std::string& extension_id,
 140     const base::FilePath& extension_path,
 141     const GURL& fetch_url,
 142     const CompletionCallback& callback)
 143     : request_context_(request_context),
 144       extension_id_(extension_id),
 145       extension_path_(extension_path),
 146       fetch_url_(fetch_url),
 147       callback_(callback),
 148       success_(false),
 149       have_verified_contents_(false),
 150       // TODO(asargent) - use the value from verified_contents.json for each
 151       // file, instead of using a constant.
 152       block_size_(4096),
 153       cancelled_(false) {
 154   bool got_id =
 155       content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
 156   DCHECK(got_id);
 157 }
 158
 159 void ContentHashFetcherJob::Start() {
 160   base::FilePath verified_contents_path =
 161       file_util::GetVerifiedContentsPath(extension_path_);
 162   base::PostTaskAndReplyWithResult(
 163       content::BrowserThread::GetBlockingPool(),
 164       FROM_HERE,
 165       base::Bind(&base::PathExists, verified_contents_path),
 166       base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
 167                  this));
 168 }
 169
 170 void ContentHashFetcherJob::Cancel() {
 171   base::AutoLock autolock(cancelled_lock_);
 172   cancelled_ = true;
 173 }
 174
 175 ContentHashFetcherJob::~ContentHashFetcherJob() {
 176 }
 177
 178 bool ContentHashFetcherJob::IsCancelled() {
 179   base::AutoLock autolock(cancelled_lock_);
 180   bool result = cancelled_;
 181   return result;
 182 }
 183
 184 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
 185   if (IsCancelled())
 186     return;
 187   if (found) {
 188     DoneFetchingVerifiedContents(true);
 189   } else {
 190     url_fetcher_.reset(
 191         net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
 192     url_fetcher_->SetRequestContext(request_context_);
 193     url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
 194                                net::LOAD_DO_NOT_SAVE_COOKIES |
 195                                net::LOAD_DISABLE_CACHE);
 196     url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
 197     url_fetcher_->Start();
 198   }
 199 }
 200
 201 // Helper function to let us pass ownership of a string via base::Bind with the
 202 // contents to be written into a file. Also ensures that the directory for
 203 // |path| exists, creating it if needed.
 204 static int WriteFileHelper(const base::FilePath& path,
 205                            scoped_ptr<std::string> content) {
 206   base::FilePath dir = path.DirName();
 207   return (base::CreateDirectoryAndGetError(dir, NULL) &&
 208           base::WriteFile(path, content->data(), content->size()));
 209 }
 210
 211 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
 212   if (IsCancelled())
 213     return;
 214   scoped_ptr<std::string> response(new std::string);
 215   if (!url_fetcher_->GetStatus().is_success() ||
 216       !url_fetcher_->GetResponseAsString(response.get())) {
 217     DoneFetchingVerifiedContents(false);
 218     return;
 219   }
 220
 221   // Parse the response to make sure it is valid json (on staging sometimes it
 222   // can be a login redirect html, xml file, etc. if you aren't logged in with
 223   // the right cookies).  TODO(asargent) - It would be a nice enhancement to
 224   // move to parsing this in a sandboxed helper (crbug.com/372878).
 225   scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
 226   if (parsed) {
 227     parsed.reset();  // no longer needed
 228     base::FilePath destination =
 229         file_util::GetVerifiedContentsPath(extension_path_);
 230     size_t size = response->size();
 231     base::PostTaskAndReplyWithResult(
 232         content::BrowserThread::GetBlockingPool(),
 233         FROM_HERE,
 234         base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
 235         base::Bind(
 236             &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
 237   } else {
 238     DoneFetchingVerifiedContents(false);
 239   }
 240 }
 241
 242 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
 243                                                       int write_result) {
 244   bool success =
 245       (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
 246   DoneFetchingVerifiedContents(success);
 247 }
 248
 249 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
 250   have_verified_contents_ = success;
 251
 252   if (IsCancelled())
 253     return;
 254
 255   // TODO(asargent) - eventually we should abort here on !success, but for
 256   // testing purposes it's actually still helpful to continue on to create the
 257   // computed hashes.
 258
 259   content::BrowserThread::PostBlockingPoolSequencedTask(
 260       "ContentHashFetcher",
 261       FROM_HERE,
 262       base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
 263 }
 264
 265 void ContentHashFetcherJob::MaybeCreateHashes() {
 266   if (IsCancelled())
 267     return;
 268   base::FilePath hashes_file =
 269       file_util::GetComputedHashesPath(extension_path_);
 270
 271   if (base::PathExists(hashes_file))
 272     success_ = true;
 273   else
 274     success_ = CreateHashes(hashes_file);
 275
 276   content::BrowserThread::PostTask(
 277       creation_thread_,
 278       FROM_HERE,
 279       base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
 280 }
 281
 282 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
 283   if (IsCancelled())
 284     return false;
 285   // Make sure the directory exists.
 286   if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
 287     return false;
 288
 289   base::FileEnumerator enumerator(extension_path_,
 290                                   true, /* recursive */
 291                                   base::FileEnumerator::FILES);
 292   // First discover all the file paths and put them in a sorted set.
 293   SortedFilePathSet paths;
 294   for (;;) {
 295     if (IsCancelled())
 296       return false;
 297
 298     base::FilePath full_path = enumerator.Next();
 299     if (full_path.empty())
 300       break;
 301     paths.insert(full_path);
 302   }
 303
 304   // Now iterate over all the paths in sorted order and compute the block hashes
 305   // for each one.
 306   ComputedHashes::Writer writer;
 307   for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
 308     if (IsCancelled())
 309       return false;
 310     const base::FilePath& full_path = *i;
 311     base::FilePath relative_path;
 312     extension_path_.AppendRelativePath(full_path, &relative_path);
 313     std::string contents;
 314     if (!base::ReadFileToString(full_path, &contents)) {
 315       LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
 316       continue;
 317     }
 318
 319     // Iterate through taking the hash of each block of size (block_size_) of
 320     // the file.
 321     std::vector<std::string> hashes;
 322     size_t offset = 0;
 323     while (offset < contents.size()) {
 324       if (IsCancelled())
 325         return false;
 326       const char* block_start = contents.data() + offset;
 327       size_t bytes_to_read =
 328           std::min(contents.size() - offset, static_cast<size_t>(block_size_));
 329       DCHECK(bytes_to_read > 0);
 330       scoped_ptr<crypto::SecureHash> hash(
 331           crypto::SecureHash::Create(crypto::SecureHash::SHA256));
 332       hash->Update(block_start, bytes_to_read);
 333
 334       hashes.push_back(std::string());
 335       std::string* buffer = &hashes.back();
 336       buffer->resize(crypto::kSHA256Length);
 337       hash->Finish(string_as_array(buffer), buffer->size());
 338
 339       // Get ready for next iteration.
 340       offset += bytes_to_read;
 341     }
 342     writer.AddHashes(relative_path, block_size_, hashes);
 343   }
 344   return writer.WriteToFile(hashes_file);
 345 }
 346
 347 void ContentHashFetcherJob::DispatchCallback() {
 348   {
 349     base::AutoLock autolock(cancelled_lock_);
 350     if (cancelled_)
 351       return;
 352   }
 353   callback_.Run(this);
 354 }
 355
 356 // ----
 357
 358 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
 359                                        ContentVerifierDelegate* delegate)
 360     : context_(context),
 361       delegate_(delegate),
 362       observer_(this),
 363       weak_ptr_factory_(this) {
 364 }
 365
 366 ContentHashFetcher::~ContentHashFetcher() {
 367   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
 368     i->second->Cancel();
 369   }
 370 }
 371
 372 void ContentHashFetcher::Start() {
 373   ExtensionRegistry* registry = ExtensionRegistry::Get(context_);
 374   observer_.Add(registry);
 375 }
 376
 377 void ContentHashFetcher::DoFetch(const Extension* extension) {
 378   if (!extension || !delegate_->ShouldBeVerified(*extension))
 379     return;
 380
 381   IdAndVersion key(extension->id(), extension->version()->GetString());
 382   if (ContainsKey(jobs_, key))
 383     return;
 384
 385   // TODO(asargent) - we should do something here to remember recent attempts
 386   // to fetch signatures by extension id, and use exponential backoff to avoid
 387   // hammering the server when we aren't successful in getting them.
 388   // crbug.com/373397
 389
 390   DCHECK(extension->version());
 391   GURL url =
 392       delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
 393   ContentHashFetcherJob* job =
 394       new ContentHashFetcherJob(context_->GetRequestContext(),
 395                                 extension->id(),
 396                                 extension->path(),
 397                                 url,
 398                                 base::Bind(&ContentHashFetcher::JobFinished,
 399                                            weak_ptr_factory_.GetWeakPtr()));
 400   jobs_.insert(std::make_pair(key, job));
 401   job->Start();
 402 }
 403
 404 void ContentHashFetcher::OnExtensionLoaded(
 405     content::BrowserContext* browser_context,
 406     const Extension* extension) {
 407   CHECK(extension);
 408   DoFetch(extension);
 409 }
 410
 411 void ContentHashFetcher::OnExtensionUnloaded(
 412     content::BrowserContext* browser_context,
 413     const Extension* extension,
 414     UnloadedExtensionInfo::Reason reason) {
 415   CHECK(extension);
 416   IdAndVersion key(extension->id(), extension->version()->GetString());
 417   JobMap::iterator found = jobs_.find(key);
 418   if (found != jobs_.end())
 419     jobs_.erase(found);
 420 }
 421
 422 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
 423   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
 424     if (i->second.get() == job) {
 425       jobs_.erase(i);
 426       break;
 427     }
 428   }
 429 }
 430
 431 }  // namespace extensions