extensions/browser/content_hash_fetcher.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "extensions/browser/content_hash_fetcher.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/base64.h"
  10 #include "base/files/file_enumerator.h"
  11 #include "base/files/file_util.h"
  12 #include "base/json/json_reader.h"
  13 #include "base/memory/ref_counted.h"
  14 #include "base/metrics/histogram.h"
  15 #include "base/synchronization/lock.h"
  16 #include "base/task_runner_util.h"
  17 #include "base/timer/elapsed_timer.h"
  18 #include "base/version.h"
  19 #include "content/public/browser/browser_context.h"
  20 #include "content/public/browser/browser_thread.h"
  21 #include "crypto/sha2.h"
  22 #include "extensions/browser/computed_hashes.h"
  23 #include "extensions/browser/content_hash_tree.h"
  24 #include "extensions/browser/content_verifier_delegate.h"
  25 #include "extensions/browser/verified_contents.h"
  26 #include "extensions/common/constants.h"
  27 #include "extensions/common/extension.h"
  28 #include "extensions/common/file_util.h"
  29 #include "net/base/load_flags.h"
  30 #include "net/url_request/url_fetcher.h"
  31 #include "net/url_request/url_fetcher_delegate.h"
  32 #include "net/url_request/url_request_status.h"
  33
  34 namespace {
  35
  36 typedef std::set<base::FilePath> SortedFilePathSet;
  37
  38 }  // namespace
  39
  40 namespace extensions {
  41
  42 // This class takes care of doing the disk and network I/O work to ensure we
  43 // have both verified_contents.json files from the webstore and
  44 // computed_hashes.json files computed over the files in an extension's
  45 // directory.
  46 class ContentHashFetcherJob
  47     : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
  48       public net::URLFetcherDelegate {
  49  public:
  50   typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
  51   ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
  52                         const ContentVerifierKey& key,
  53                         const std::string& extension_id,
  54                         const base::FilePath& extension_path,
  55                         const GURL& fetch_url,
  56                         bool force,
  57                         const CompletionCallback& callback);
  58
  59   void Start();
  60
  61   // Cancels this job, which will attempt to stop I/O operations sooner than
  62   // just waiting for the entire job to complete. Safe to call from any thread.
  63   void Cancel();
  64
  65   // Checks whether this job has been cancelled. Safe to call from any thread.
  66   bool IsCancelled();
  67
  68   // Returns whether this job was successful (we have both verified contents
  69   // and computed hashes). Even if the job was a success, there might have been
  70   // files that were found to have contents not matching expectations; these
  71   // are available by calling hash_mismatch_paths().
  72   bool success() { return success_; }
  73
  74   bool force() { return force_; }
  75
  76   const std::string& extension_id() { return extension_id_; }
  77
  78   // Returns the set of paths that had a hash mismatch.
  79   const std::set<base::FilePath>& hash_mismatch_paths() {
  80     return hash_mismatch_paths_;
  81   }
  82
  83  private:
  84   friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
  85   virtual ~ContentHashFetcherJob();
  86
  87   // Tries to load a verified_contents.json file at |path|. On successfully
  88   // reading and validing the file, the verified_contents_ member variable will
  89   // be set and this function will return true. If the file does not exist, or
  90   // exists but is invalid, it will return false. Also, any invalid
  91   // file will be removed from disk and
  92   bool LoadVerifiedContents(const base::FilePath& path);
  93
  94   // Callback for when we're done doing file I/O to see if we already have
  95   // a verified contents file. If we don't, this will kick off a network
  96   // request to get one.
  97   void DoneCheckingForVerifiedContents(bool found);
  98
  99   // URLFetcherDelegate interface
 100   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
 101
 102   // Callback for when we're done ensuring we have verified contents, and are
 103   // ready to move on to MaybeCreateHashes.
 104   void DoneFetchingVerifiedContents(bool success);
 105
 106   // Callback for the job to write the verified contents to the filesystem.
 107   void OnVerifiedContentsWritten(size_t expected_size, int write_result);
 108
 109   // The verified contents file from the webstore only contains the treehash
 110   // root hash, but for performance we want to cache the individual block level
 111   // hashes. This function will create that cache with block-level hashes for
 112   // each file in the extension if needed (the treehash root hash for each of
 113   // these should equal what is in the verified contents file from the
 114   // webstore).
 115   void MaybeCreateHashes();
 116
 117   // Computes hashes for all files in |extension_path_|, and uses a
 118   // ComputedHashes::Writer to write that information into
 119   // |hashes_file|. Returns true on success.
 120   bool CreateHashes(const base::FilePath& hashes_file);
 121
 122   // Will call the callback, if we haven't been cancelled.
 123   void DispatchCallback();
 124
 125   net::URLRequestContextGetter* request_context_;
 126   std::string extension_id_;
 127   base::FilePath extension_path_;
 128
 129   // The url we'll need to use to fetch a verified_contents.json file.
 130   GURL fetch_url_;
 131
 132   bool force_;
 133
 134   CompletionCallback callback_;
 135   content::BrowserThread::ID creation_thread_;
 136
 137   // Used for fetching content signatures.
 138   scoped_ptr<net::URLFetcher> url_fetcher_;
 139
 140   // The key used to validate verified_contents.json.
 141   ContentVerifierKey key_;
 142
 143   // The parsed contents of the verified_contents.json file, either read from
 144   // disk or fetched from the network and then written to disk.
 145   scoped_ptr<VerifiedContents> verified_contents_;
 146
 147   // Whether this job succeeded.
 148   bool success_;
 149
 150   // Paths that were found to have a mismatching hash.
 151   std::set<base::FilePath> hash_mismatch_paths_;
 152
 153   // The block size to use for hashing.
 154   int block_size_;
 155
 156   // Note: this may be accessed from multiple threads, so all access should
 157   // be protected by |cancelled_lock_|.
 158   bool cancelled_;
 159
 160   // A lock for synchronizing access to |cancelled_|.
 161   base::Lock cancelled_lock_;
 162
 163   DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob);
 164 };
 165
 166 ContentHashFetcherJob::ContentHashFetcherJob(
 167     net::URLRequestContextGetter* request_context,
 168     const ContentVerifierKey& key,
 169     const std::string& extension_id,
 170     const base::FilePath& extension_path,
 171     const GURL& fetch_url,
 172     bool force,
 173     const CompletionCallback& callback)
 174     : request_context_(request_context),
 175       extension_id_(extension_id),
 176       extension_path_(extension_path),
 177       fetch_url_(fetch_url),
 178       force_(force),
 179       callback_(callback),
 180       key_(key),
 181       success_(false),
 182       // TODO(asargent) - use the value from verified_contents.json for each
 183       // file, instead of using a constant.
 184       block_size_(4096),
 185       cancelled_(false) {
 186   bool got_id =
 187       content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
 188   DCHECK(got_id);
 189 }
 190
 191 void ContentHashFetcherJob::Start() {
 192   base::FilePath verified_contents_path =
 193       file_util::GetVerifiedContentsPath(extension_path_);
 194   base::PostTaskAndReplyWithResult(
 195       content::BrowserThread::GetBlockingPool(),
 196       FROM_HERE,
 197       base::Bind(&ContentHashFetcherJob::LoadVerifiedContents,
 198                  this,
 199                  verified_contents_path),
 200       base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
 201                  this));
 202 }
 203
 204 void ContentHashFetcherJob::Cancel() {
 205   base::AutoLock autolock(cancelled_lock_);
 206   cancelled_ = true;
 207 }
 208
 209 bool ContentHashFetcherJob::IsCancelled() {
 210   base::AutoLock autolock(cancelled_lock_);
 211   bool result = cancelled_;
 212   return result;
 213 }
 214
 215 ContentHashFetcherJob::~ContentHashFetcherJob() {
 216 }
 217
 218 bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath& path) {
 219   if (!base::PathExists(path))
 220     return false;
 221   verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
 222   if (!verified_contents_->InitFrom(path, false)) {
 223     verified_contents_.reset();
 224     if (!base::DeleteFile(path, false))
 225       LOG(WARNING) << "Failed to delete " << path.value();
 226     return false;
 227   }
 228   return true;
 229 }
 230
 231 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
 232   if (IsCancelled())
 233     return;
 234   if (found) {
 235     VLOG(1) << "Found verified contents for " << extension_id_;
 236     DoneFetchingVerifiedContents(true);
 237   } else {
 238     VLOG(1) << "Missing verified contents for " << extension_id_
 239             << ", fetching...";
 240     url_fetcher_.reset(
 241         net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
 242     url_fetcher_->SetRequestContext(request_context_);
 243     url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
 244                                net::LOAD_DO_NOT_SAVE_COOKIES |
 245                                net::LOAD_DISABLE_CACHE);
 246     url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
 247     url_fetcher_->Start();
 248   }
 249 }
 250
 251 // Helper function to let us pass ownership of a string via base::Bind with the
 252 // contents to be written into a file. Also ensures that the directory for
 253 // |path| exists, creating it if needed.
 254 static int WriteFileHelper(const base::FilePath& path,
 255                            scoped_ptr<std::string> content) {
 256   base::FilePath dir = path.DirName();
 257   return (base::CreateDirectoryAndGetError(dir, NULL) &&
 258           base::WriteFile(path, content->data(), content->size()));
 259 }
 260
 261 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
 262   VLOG(1) << "URLFetchComplete for " << extension_id_
 263           << " is_success:" << url_fetcher_->GetStatus().is_success() << " "
 264           << fetch_url_.possibly_invalid_spec();
 265   if (IsCancelled())
 266     return;
 267   scoped_ptr<std::string> response(new std::string);
 268   if (!url_fetcher_->GetStatus().is_success() ||
 269       !url_fetcher_->GetResponseAsString(response.get())) {
 270     DoneFetchingVerifiedContents(false);
 271     return;
 272   }
 273
 274   // Parse the response to make sure it is valid json (on staging sometimes it
 275   // can be a login redirect html, xml file, etc. if you aren't logged in with
 276   // the right cookies).  TODO(asargent) - It would be a nice enhancement to
 277   // move to parsing this in a sandboxed helper (crbug.com/372878).
 278   scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
 279   if (parsed) {
 280     VLOG(1) << "JSON parsed ok for " << extension_id_;
 281
 282     parsed.reset();  // no longer needed
 283     base::FilePath destination =
 284         file_util::GetVerifiedContentsPath(extension_path_);
 285     size_t size = response->size();
 286     base::PostTaskAndReplyWithResult(
 287         content::BrowserThread::GetBlockingPool(),
 288         FROM_HERE,
 289         base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
 290         base::Bind(
 291             &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
 292   } else {
 293     DoneFetchingVerifiedContents(false);
 294   }
 295 }
 296
 297 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
 298                                                       int write_result) {
 299   bool success =
 300       (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
 301   DoneFetchingVerifiedContents(success);
 302 }
 303
 304 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
 305   if (IsCancelled())
 306     return;
 307
 308   if (!success) {
 309     DispatchCallback();
 310     return;
 311   }
 312
 313   content::BrowserThread::PostBlockingPoolSequencedTask(
 314       "ContentHashFetcher",
 315       FROM_HERE,
 316       base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
 317 }
 318
 319 void ContentHashFetcherJob::MaybeCreateHashes() {
 320   if (IsCancelled())
 321     return;
 322   base::FilePath hashes_file =
 323       file_util::GetComputedHashesPath(extension_path_);
 324
 325   if (!force_ && base::PathExists(hashes_file)) {
 326     success_ = true;
 327   } else {
 328     if (force_)
 329       base::DeleteFile(hashes_file, false /* recursive */);
 330     success_ = CreateHashes(hashes_file);
 331   }
 332
 333   content::BrowserThread::PostTask(
 334       creation_thread_,
 335       FROM_HERE,
 336       base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
 337 }
 338
 339 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
 340   base::ElapsedTimer timer;
 341   if (IsCancelled())
 342     return false;
 343   // Make sure the directory exists.
 344   if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
 345     return false;
 346
 347   if (!verified_contents_.get()) {
 348     base::FilePath verified_contents_path =
 349         file_util::GetVerifiedContentsPath(extension_path_);
 350     verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
 351     if (!verified_contents_->InitFrom(verified_contents_path, false))
 352       return false;
 353     verified_contents_.reset();
 354   }
 355
 356   base::FileEnumerator enumerator(extension_path_,
 357                                   true, /* recursive */
 358                                   base::FileEnumerator::FILES);
 359   // First discover all the file paths and put them in a sorted set.
 360   SortedFilePathSet paths;
 361   for (;;) {
 362     if (IsCancelled())
 363       return false;
 364
 365     base::FilePath full_path = enumerator.Next();
 366     if (full_path.empty())
 367       break;
 368     paths.insert(full_path);
 369   }
 370
 371   // Now iterate over all the paths in sorted order and compute the block hashes
 372   // for each one.
 373   ComputedHashes::Writer writer;
 374   for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
 375     if (IsCancelled())
 376       return false;
 377     const base::FilePath& full_path = *i;
 378     base::FilePath relative_path;
 379     extension_path_.AppendRelativePath(full_path, &relative_path);
 380     relative_path = relative_path.NormalizePathSeparatorsTo('/');
 381
 382     const std::string* expected_root =
 383         verified_contents_->GetTreeHashRoot(relative_path);
 384     if (!expected_root)
 385       continue;
 386
 387     std::string contents;
 388     if (!base::ReadFileToString(full_path, &contents)) {
 389       LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
 390       continue;
 391     }
 392
 393     // Iterate through taking the hash of each block of size (block_size_) of
 394     // the file.
 395     std::vector<std::string> hashes;
 396     ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes);
 397     std::string root =
 398         ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
 399     if (expected_root && *expected_root != root) {
 400       VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe();
 401       hash_mismatch_paths_.insert(relative_path);
 402       continue;
 403     }
 404
 405     writer.AddHashes(relative_path, block_size_, hashes);
 406   }
 407   bool result = writer.WriteToFile(hashes_file);
 408   UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
 409                       timer.Elapsed());
 410   return result;
 411 }
 412
 413 void ContentHashFetcherJob::DispatchCallback() {
 414   {
 415     base::AutoLock autolock(cancelled_lock_);
 416     if (cancelled_)
 417       return;
 418   }
 419   callback_.Run(this);
 420 }
 421
 422 // ----
 423
 424 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
 425                                        ContentVerifierDelegate* delegate,
 426                                        const FetchCallback& callback)
 427     : context_(context),
 428       delegate_(delegate),
 429       fetch_callback_(callback),
 430       weak_ptr_factory_(this) {
 431 }
 432
 433 ContentHashFetcher::~ContentHashFetcher() {
 434   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
 435     i->second->Cancel();
 436   }
 437 }
 438
 439 void ContentHashFetcher::DoFetch(const Extension* extension, bool force) {
 440   DCHECK(extension);
 441
 442   IdAndVersion key(extension->id(), extension->version()->GetString());
 443   JobMap::iterator found = jobs_.find(key);
 444   if (found != jobs_.end()) {
 445     if (!force || found->second->force()) {
 446       // Just let the existing job keep running.
 447       return;
 448     } else {
 449       // Kill the existing non-force job, so we can start a new one below.
 450       found->second->Cancel();
 451       jobs_.erase(found);
 452     }
 453   }
 454
 455   // TODO(asargent) - we should do something here to remember recent attempts
 456   // to fetch signatures by extension id, and use exponential backoff to avoid
 457   // hammering the server when we aren't successful in getting them.
 458   // crbug.com/373397
 459
 460   DCHECK(extension->version());
 461   GURL url =
 462       delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
 463   ContentHashFetcherJob* job =
 464       new ContentHashFetcherJob(context_->GetRequestContext(),
 465                                 delegate_->PublicKey(),
 466                                 extension->id(),
 467                                 extension->path(),
 468                                 url,
 469                                 force,
 470                                 base::Bind(&ContentHashFetcher::JobFinished,
 471                                            weak_ptr_factory_.GetWeakPtr()));
 472   jobs_.insert(std::make_pair(key, job));
 473   job->Start();
 474 }
 475
 476 void ContentHashFetcher::ExtensionLoaded(const Extension* extension) {
 477   CHECK(extension);
 478   DoFetch(extension, false);
 479 }
 480
 481 void ContentHashFetcher::ExtensionUnloaded(const Extension* extension) {
 482   CHECK(extension);
 483   IdAndVersion key(extension->id(), extension->version()->GetString());
 484   JobMap::iterator found = jobs_.find(key);
 485   if (found != jobs_.end()) {
 486     found->second->Cancel();
 487     jobs_.erase(found);
 488   }
 489 }
 490
 491 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
 492   if (!job->IsCancelled()) {
 493     fetch_callback_.Run(job->extension_id(),
 494                         job->success(),
 495                         job->force(),
 496                         job->hash_mismatch_paths());
 497   }
 498
 499   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
 500     if (i->second.get() == job) {
 501       jobs_.erase(i);
 502       break;
 503     }
 504   }
 505 }
 506
 507 }  // namespace extensions