chrome/browser/safe_browsing/safe_browsing_store_file.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
   6
   7 #include "base/file_util.h"
   8 #include "base/files/scoped_file.h"
   9 #include "base/md5.h"
  10 #include "base/metrics/histogram.h"
  11 #include "base/metrics/sparse_histogram.h"
  12
  13 namespace {
  14
  15 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
  16 // that byte-order changes force corruption.
  17 const int32 kFileMagic = 0x600D71FE;
  18
  19 // Version history:
  20 // Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite)
  21 // Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28
  22 // Version 8: d3dd0715/r259791 by shess@chromium.org on 2014-03-27
  23 const int32 kFileVersion = 8;
  24
  25 // ReadAndVerifyHeader() returns this in case of error.
  26 const int32 kInvalidVersion = -1;
  27
  28 // Header at the front of the main database file.
  29 struct FileHeaderV7 {
  30   int32 magic, version;
  31   uint32 add_chunk_count, sub_chunk_count;
  32   uint32 add_prefix_count, sub_prefix_count;
  33   uint32 add_hash_count, sub_hash_count;
  34 };
  35
  36 // Starting with version 8, the storage is sorted and can be sharded to allow
  37 // updates to be done with lower memory requirements.  Newly written files will
  38 // be sharded to need less than this amount of memory during update.  Larger
  39 // values are preferred to minimize looping overhead during processing.
  40 const int64 kUpdateStorageBytes = 100 * 1024;
  41
  42 // Prevent excessive sharding by setting a lower limit on the shard stride.
  43 // Smaller values should work fine, but very small values will probably lead to
  44 // poor performance.  Shard stride is indirectly related to
  45 // |kUpdateStorageBytes|, setting that very small will bump against this.
  46 const uint32 kMinShardStride = 1 << 24;
  47
  48 // Strides over the entire SBPrefix space.
  49 const uint64 kMaxShardStride = 1ULL << 32;
  50
  51 // Maximum SBPrefix value.
  52 const SBPrefix kMaxSBPrefix = ~0;
  53
  54 // Header at the front of the main database file.
  55 struct FileHeaderV8 {
  56   int32 magic, version;
  57   uint32 add_chunk_count, sub_chunk_count;
  58   uint32 shard_stride;
  59   // TODO(shess): Is this where 64-bit will bite me?  Perhaps write a
  60   // specialized read/write?
  61 };
  62
  63 union FileHeader {
  64   struct FileHeaderV7 v7;
  65   struct FileHeaderV8 v8;
  66 };
  67
  68 // Header for each chunk in the chunk-accumulation file.
  69 struct ChunkHeader {
  70   uint32 add_prefix_count, sub_prefix_count;
  71   uint32 add_hash_count, sub_hash_count;
  72 };
  73
  74 // Header for each shard of data in the main database file.
  75 struct ShardHeader {
  76   uint32 add_prefix_count, sub_prefix_count;
  77   uint32 add_hash_count, sub_hash_count;
  78 };
  79
  80 // Enumerate different format-change events for histogramming
  81 // purposes.  DO NOT CHANGE THE ORDERING OF THESE VALUES.
  82 enum FormatEventType {
  83   // Corruption detected, broken down by file format.
  84   FORMAT_EVENT_FILE_CORRUPT,
  85   FORMAT_EVENT_SQLITE_CORRUPT,  // Obsolete
  86
  87   // The type of format found in the file.  The expected case (new
  88   // file format) is intentionally not covered.
  89   FORMAT_EVENT_FOUND_SQLITE,
  90   FORMAT_EVENT_FOUND_UNKNOWN,
  91
  92   // The number of SQLite-format files deleted should be the same as
  93   // FORMAT_EVENT_FOUND_SQLITE.  It can differ if the delete fails,
  94   // or if a failure prevents the update from succeeding.
  95   FORMAT_EVENT_SQLITE_DELETED,  // Obsolete
  96   FORMAT_EVENT_SQLITE_DELETE_FAILED,  // Obsolete
  97
  98   // Found and deleted (or failed to delete) the ancient "Safe
  99   // Browsing" file.
 100   FORMAT_EVENT_DELETED_ORIGINAL,
 101   FORMAT_EVENT_DELETED_ORIGINAL_FAILED,
 102
 103   // The checksum did not check out in CheckValidity() or in
 104   // FinishUpdate().  This most likely indicates that the machine
 105   // crashed before the file was fully sync'ed to disk.
 106   FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE,
 107   FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE,
 108
 109   // The header checksum was incorrect in ReadAndVerifyHeader().  Likely
 110   // indicates that the system crashed while writing an update.
 111   FORMAT_EVENT_HEADER_CHECKSUM_FAILURE,
 112
 113   // Memory space for histograms is determined by the max.  ALWAYS
 114   // ADD NEW VALUES BEFORE THIS ONE.
 115   FORMAT_EVENT_MAX
 116 };
 117
 118 void RecordFormatEvent(FormatEventType event_type) {
 119   UMA_HISTOGRAM_ENUMERATION("SB2.FormatEvent", event_type, FORMAT_EVENT_MAX);
 120 }
 121
 122 // Rewind the file.  Using fseek(2) because rewind(3) errors are
 123 // weird.
 124 bool FileRewind(FILE* fp) {
 125   int rv = fseek(fp, 0, SEEK_SET);
 126   DCHECK_EQ(rv, 0);
 127   return rv == 0;
 128 }
 129
 130 // Read from |fp| into |item|, and fold the input data into the
 131 // checksum in |context|, if non-NULL.  Return true on success.
 132 template <class T>
 133 bool ReadItem(T* item, FILE* fp, base::MD5Context* context) {
 134   const size_t ret = fread(item, sizeof(T), 1, fp);
 135   if (ret != 1)
 136     return false;
 137
 138   if (context) {
 139     base::MD5Update(context,
 140                     base::StringPiece(reinterpret_cast<char*>(item),
 141                                       sizeof(T)));
 142   }
 143   return true;
 144 }
 145
 146 // Write |item| to |fp|, and fold the output data into the checksum in
 147 // |context|, if non-NULL.  Return true on success.
 148 template <class T>
 149 bool WriteItem(const T& item, FILE* fp, base::MD5Context* context) {
 150   const size_t ret = fwrite(&item, sizeof(T), 1, fp);
 151   if (ret != 1)
 152     return false;
 153
 154   if (context) {
 155     base::MD5Update(context,
 156                     base::StringPiece(reinterpret_cast<const char*>(&item),
 157                                       sizeof(T)));
 158   }
 159
 160   return true;
 161 }
 162
 163 // Read |count| items into |values| from |fp|, and fold them into the
 164 // checksum in |context|.  Returns true on success.
 165 template <typename CT>
 166 bool ReadToContainer(CT* values, size_t count, FILE* fp,
 167                      base::MD5Context* context) {
 168   if (!count)
 169     return true;
 170
 171   for (size_t i = 0; i < count; ++i) {
 172     typename CT::value_type value;
 173     if (!ReadItem(&value, fp, context))
 174       return false;
 175
 176     // push_back() is more obvious, but coded this way std::set can
 177     // also be read.
 178     values->insert(values->end(), value);
 179   }
 180
 181   return true;
 182 }
 183
 184 // Write values between |beg| and |end| to |fp|, and fold the data into the
 185 // checksum in |context|, if non-NULL.  Returns true if all items successful.
 186 template <typename CTI>
 187 bool WriteRange(const CTI& beg, const CTI& end,
 188                 FILE* fp, base::MD5Context* context) {
 189   for (CTI iter = beg; iter != end; ++iter) {
 190     if (!WriteItem(*iter, fp, context))
 191       return false;
 192   }
 193   return true;
 194 }
 195
 196 // Write all of |values| to |fp|, and fold the data into the checksum
 197 // in |context|, if non-NULL.  Returns true if all items successful.
 198 template <typename CT>
 199 bool WriteContainer(const CT& values, FILE* fp,
 200                     base::MD5Context* context) {
 201   return WriteRange(values.begin(), values.end(), fp, context);
 202 }
 203
 204 // Delete the chunks in |deleted| from |chunks|.
 205 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
 206                          std::set<int32>* chunks) {
 207   for (std::set<int32>::iterator iter = chunks->begin();
 208        iter != chunks->end();) {
 209     std::set<int32>::iterator prev = iter++;
 210     if (deleted.count(*prev) > 0)
 211       chunks->erase(prev);
 212   }
 213 }
 214
 215 bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) {
 216   base::MD5Digest calculated_digest;
 217   base::MD5IntermediateFinal(&calculated_digest, context);
 218
 219   base::MD5Digest file_digest;
 220   if (!ReadItem(&file_digest, fp, context))
 221     return false;
 222
 223   return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0;
 224 }
 225
 226 // Sanity-check the header against the file's size to make sure our
 227 // vectors aren't gigantic.  This doubles as a cheap way to detect
 228 // corruption without having to checksum the entire file.
 229 bool FileHeaderV7SanityCheck(const base::FilePath& filename,
 230                              const FileHeaderV7& header) {
 231   DCHECK_EQ(header.version, 7);
 232
 233   int64 size = 0;
 234   if (!base::GetFileSize(filename, &size))
 235     return false;
 236
 237   int64 expected_size = sizeof(FileHeaderV7);
 238   expected_size += header.add_chunk_count * sizeof(int32);
 239   expected_size += header.sub_chunk_count * sizeof(int32);
 240   expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
 241   expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
 242   expected_size += header.add_hash_count * sizeof(SBAddFullHash);
 243   expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
 244   expected_size += sizeof(base::MD5Digest);
 245   if (size != expected_size)
 246     return false;
 247
 248   return true;
 249 }
 250
 251 // Helper function to read the file header and chunk TOC.  Rewinds |fp| and
 252 // initializes |context|.  The header is left in |header|, with the version
 253 // returned.  kInvalidVersion is returned for sanity check or checksum failure.
 254 int ReadAndVerifyHeader(const base::FilePath& filename,
 255                         FileHeader* header,
 256                         std::set<int32>* add_chunks,
 257                         std::set<int32>* sub_chunks,
 258                         FILE* fp,
 259                         base::MD5Context* context) {
 260   DCHECK(header);
 261   DCHECK(add_chunks);
 262   DCHECK(sub_chunks);
 263   DCHECK(fp);
 264   DCHECK(context);
 265
 266   int version = kInvalidVersion;
 267
 268   base::MD5Init(context);
 269   if (!FileRewind(fp))
 270     return kInvalidVersion;
 271   if (!ReadItem(&header->v8, fp, context))
 272     return kInvalidVersion;
 273   if (header->v8.magic != kFileMagic)
 274     return kInvalidVersion;
 275
 276   size_t add_chunks_count = 0;
 277   size_t sub_chunks_count = 0;
 278
 279   // Track version read to inform removal of support for older versions.
 280   UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.StoreVersionRead", header->v8.version);
 281
 282   if (header->v8.version == 7) {
 283     version = 7;
 284
 285     // Reset the context and re-read the v7 header.
 286     base::MD5Init(context);
 287     if (!FileRewind(fp))
 288       return kInvalidVersion;
 289     if (!ReadItem(&header->v7, fp, context))
 290       return kInvalidVersion;
 291     if (header->v7.magic != kFileMagic || header->v7.version != 7)
 292       return kInvalidVersion;
 293     if (!FileHeaderV7SanityCheck(filename, header->v7))
 294       return kInvalidVersion;
 295
 296     add_chunks_count = header->v7.add_chunk_count;
 297     sub_chunks_count = header->v7.sub_chunk_count;
 298   } else if (header->v8.version == kFileVersion) {
 299     version = 8;
 300     add_chunks_count = header->v8.add_chunk_count;
 301     sub_chunks_count = header->v8.sub_chunk_count;
 302   } else {
 303     return kInvalidVersion;
 304   }
 305
 306   if (!ReadToContainer(add_chunks, add_chunks_count, fp, context) ||
 307       !ReadToContainer(sub_chunks, sub_chunks_count, fp, context)) {
 308     return kInvalidVersion;
 309   }
 310
 311   // v8 includes a checksum to validate the header.
 312   if (version > 7 && !ReadAndVerifyChecksum(fp, context)) {
 313     RecordFormatEvent(FORMAT_EVENT_HEADER_CHECKSUM_FAILURE);
 314     return kInvalidVersion;
 315   }
 316
 317   return version;
 318 }
 319
 320 // Helper function to write out the initial header and chunks-contained data.
 321 // Rewinds |fp|, initializes |context|, then writes a file header and
 322 // |add_chunks| and |sub_chunks|.
 323 bool WriteHeader(uint32 out_stride,
 324                  const std::set<int32>& add_chunks,
 325                  const std::set<int32>& sub_chunks,
 326                  FILE* fp,
 327                  base::MD5Context* context) {
 328   if (!FileRewind(fp))
 329     return false;
 330
 331   base::MD5Init(context);
 332   FileHeaderV8 header;
 333   header.magic = kFileMagic;
 334   header.version = kFileVersion;
 335   header.add_chunk_count = add_chunks.size();
 336   header.sub_chunk_count = sub_chunks.size();
 337   header.shard_stride = out_stride;
 338   if (!WriteItem(header, fp, context))
 339     return false;
 340
 341   if (!WriteContainer(add_chunks, fp, context) ||
 342       !WriteContainer(sub_chunks, fp, context))
 343     return false;
 344
 345   // Write out the header digest.
 346   base::MD5Digest header_digest;
 347   base::MD5IntermediateFinal(&header_digest, context);
 348   if (!WriteItem(header_digest, fp, context))
 349     return false;
 350
 351   return true;
 352 }
 353
 354 // Return |true| if the range is sorted by the given comparator.
 355 template <typename CTI, typename LESS>
 356 bool sorted(CTI beg, CTI end, LESS less) {
 357   while ((end - beg) > 2) {
 358     CTI n = beg++;
 359     DCHECK(!less(*beg, *n));
 360     if (less(*beg, *n))
 361       return false;
 362   }
 363   return true;
 364 }
 365
 366 // Merge |beg|..|end| into |container|.  Both should be sorted by the given
 367 // comparator, and the range iterators should not be derived from |container|.
 368 // Differs from std::inplace_merge() in that additional memory is not required
 369 // for linear performance.
 370 template <typename CT, typename CTI, typename COMP>
 371 void container_merge(CT* container, CTI beg, CTI end, const COMP& less) {
 372   DCHECK(sorted(container->begin(), container->end(), less));
 373   DCHECK(sorted(beg, end, less));
 374
 375   // Size the container to fit the results.
 376   const size_t c_size = container->size();
 377   container->resize(c_size + (end - beg));
 378
 379   // |c_end| points to the original endpoint, while |c_out| points to the
 380   // endpoint that will scan from end to beginning while merging.
 381   typename CT::iterator c_end = container->begin() + c_size;
 382   typename CT::iterator c_out = container->end();
 383
 384   // While both inputs have data, move the greater to |c_out|.
 385   while (c_end != container->begin() && end != beg) {
 386     if (less(*(c_end - 1), *(end - 1))) {
 387       *(--c_out) = *(--end);
 388     } else {
 389       *(--c_out) = *(--c_end);
 390     }
 391   }
 392
 393   // Copy any data remaining in the new range.
 394   if (end != beg) {
 395     // The original container data has been fully shifted.
 396     DCHECK(c_end == container->begin());
 397
 398     // There is exactly the correct amount of space left.
 399     DCHECK_EQ(c_out - c_end, end - beg);
 400
 401     std::copy(beg, end, container->begin());
 402   }
 403
 404   DCHECK(sorted(container->begin(), container->end(), less));
 405 }
 406
 407 // Collection of iterators used while stepping through StateInternal (see
 408 // below).
 409 class StateInternalPos {
 410  public:
 411   StateInternalPos(SBAddPrefixes::iterator add_prefixes_iter,
 412                    SBSubPrefixes::iterator sub_prefixes_iter,
 413                    std::vector<SBAddFullHash>::iterator add_hashes_iter,
 414                    std::vector<SBSubFullHash>::iterator sub_hashes_iter)
 415       : add_prefixes_iter_(add_prefixes_iter),
 416         sub_prefixes_iter_(sub_prefixes_iter),
 417         add_hashes_iter_(add_hashes_iter),
 418         sub_hashes_iter_(sub_hashes_iter) {
 419   }
 420
 421   SBAddPrefixes::iterator add_prefixes_iter_;
 422   SBSubPrefixes::iterator sub_prefixes_iter_;
 423   std::vector<SBAddFullHash>::iterator add_hashes_iter_;
 424   std::vector<SBSubFullHash>::iterator sub_hashes_iter_;
 425 };
 426
 427 // Helper to find the next shard boundary.
 428 template <class T>
 429 bool prefix_bounder(SBPrefix val, const T& elt) {
 430   return val < elt.GetAddPrefix();
 431 }
 432
 433 // Container for partial database state.  Includes add/sub prefixes/hashes, plus
 434 // aggregate operations on same.
 435 class StateInternal {
 436  public:
 437   // Append indicated amount of data from |fp|.
 438   bool AppendData(size_t add_prefix_count, size_t sub_prefix_count,
 439                   size_t add_hash_count, size_t sub_hash_count,
 440                   FILE* fp, base::MD5Context* context) {
 441     return
 442         ReadToContainer(&add_prefixes_, add_prefix_count, fp, context) &&
 443         ReadToContainer(&sub_prefixes_, sub_prefix_count, fp, context) &&
 444         ReadToContainer(&add_full_hashes_, add_hash_count, fp, context) &&
 445         ReadToContainer(&sub_full_hashes_, sub_hash_count, fp, context);
 446   }
 447
 448   void ClearData() {
 449     add_prefixes_.clear();
 450     sub_prefixes_.clear();
 451     add_full_hashes_.clear();
 452     sub_full_hashes_.clear();
 453   }
 454
 455   // Merge data from |beg|..|end| into receiver's state, then process the state.
 456   // The current state and the range given should corrospond to the same sorted
 457   // shard of data from different sources.  |add_del_cache| and |sub_del_cache|
 458   // indicate the chunk ids which should be deleted during processing (see
 459   // SBProcessSubs).
 460   void MergeDataAndProcess(const StateInternalPos& beg,
 461                            const StateInternalPos& end,
 462                            const base::hash_set<int32>& add_del_cache,
 463                            const base::hash_set<int32>& sub_del_cache) {
 464     container_merge(&add_prefixes_,
 465                     beg.add_prefixes_iter_,
 466                     end.add_prefixes_iter_,
 467                     SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
 468
 469     container_merge(&sub_prefixes_,
 470                     beg.sub_prefixes_iter_,
 471                     end.sub_prefixes_iter_,
 472                     SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
 473
 474     container_merge(&add_full_hashes_,
 475                     beg.add_hashes_iter_,
 476                     end.add_hashes_iter_,
 477                     SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
 478
 479     container_merge(&sub_full_hashes_,
 480                     beg.sub_hashes_iter_,
 481                     end.sub_hashes_iter_,
 482                     SBAddPrefixHashLess<SBSubFullHash, SBSubFullHash>);
 483
 484     SBProcessSubs(&add_prefixes_, &sub_prefixes_,
 485                   &add_full_hashes_, &sub_full_hashes_,
 486                   add_del_cache, sub_del_cache);
 487   }
 488
 489   // Sort the data appropriately for the sharding, merging, and processing
 490   // operations.
 491   void SortData() {
 492     std::sort(add_prefixes_.begin(), add_prefixes_.end(),
 493               SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
 494     std::sort(sub_prefixes_.begin(), sub_prefixes_.end(),
 495               SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
 496     std::sort(add_full_hashes_.begin(), add_full_hashes_.end(),
 497               SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
 498     std::sort(sub_full_hashes_.begin(), sub_full_hashes_.end(),
 499               SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
 500   }
 501
 502   // Iterator from the beginning of the state's data.
 503   StateInternalPos StateBegin() {
 504     return StateInternalPos(add_prefixes_.begin(),
 505                             sub_prefixes_.begin(),
 506                             add_full_hashes_.begin(),
 507                             sub_full_hashes_.begin());
 508   }
 509
 510   // An iterator pointing just after the last possible element of the shard
 511   // indicated by |shard_max|.  Used to step through the state by shard.
 512   // TODO(shess): Verify whether binary search really improves over linear.
 513   // Merging or writing will immediately touch all of these elements.
 514   StateInternalPos ShardEnd(const StateInternalPos& beg, SBPrefix shard_max) {
 515     return StateInternalPos(
 516         std::upper_bound(beg.add_prefixes_iter_, add_prefixes_.end(),
 517                          shard_max, prefix_bounder<SBAddPrefix>),
 518         std::upper_bound(beg.sub_prefixes_iter_, sub_prefixes_.end(),
 519                          shard_max, prefix_bounder<SBSubPrefix>),
 520         std::upper_bound(beg.add_hashes_iter_, add_full_hashes_.end(),
 521                          shard_max, prefix_bounder<SBAddFullHash>),
 522         std::upper_bound(beg.sub_hashes_iter_, sub_full_hashes_.end(),
 523                          shard_max, prefix_bounder<SBSubFullHash>));
 524   }
 525
 526   // Write a shard header and data for the shard starting at |beg| and ending at
 527   // the element before |end|.
 528   bool WriteShard(const StateInternalPos& beg, const StateInternalPos& end,
 529                   FILE* fp, base::MD5Context* context) {
 530     ShardHeader shard_header;
 531     shard_header.add_prefix_count =
 532         end.add_prefixes_iter_ - beg.add_prefixes_iter_;
 533     shard_header.sub_prefix_count =
 534         end.sub_prefixes_iter_ - beg.sub_prefixes_iter_;
 535     shard_header.add_hash_count =
 536         end.add_hashes_iter_ - beg.add_hashes_iter_;
 537     shard_header.sub_hash_count =
 538         end.sub_hashes_iter_ - beg.sub_hashes_iter_;
 539
 540     return
 541         WriteItem(shard_header, fp, context) &&
 542         WriteRange(beg.add_prefixes_iter_, end.add_prefixes_iter_,
 543                    fp, context) &&
 544         WriteRange(beg.sub_prefixes_iter_, end.sub_prefixes_iter_,
 545                    fp, context) &&
 546         WriteRange(beg.add_hashes_iter_, end.add_hashes_iter_,
 547                    fp, context) &&
 548         WriteRange(beg.sub_hashes_iter_, end.sub_hashes_iter_,
 549                    fp, context);
 550   }
 551
 552   SBAddPrefixes add_prefixes_;
 553   SBSubPrefixes sub_prefixes_;
 554   std::vector<SBAddFullHash> add_full_hashes_;
 555   std::vector<SBSubFullHash> sub_full_hashes_;
 556 };
 557
 558 // True if |val| is an even power of two.
 559 template <typename T>
 560 bool IsPowerOfTwo(const T& val) {
 561   return val && (val & (val - 1)) == 0;
 562 }
 563
 564 // Helper to read the entire database state, used by GetAddPrefixes() and
 565 // GetAddFullHashes().  Those functions are generally used only for smaller
 566 // files.  Returns false in case of errors reading the data.
 567 bool ReadDbStateHelper(const base::FilePath& filename,
 568                        StateInternal* db_state) {
 569   file_util::ScopedFILE file(base::OpenFile(filename, "rb"));
 570   if (file.get() == NULL)
 571     return false;
 572
 573   std::set<int32> add_chunks;
 574   std::set<int32> sub_chunks;
 575
 576   base::MD5Context context;
 577   FileHeader header;
 578   const int version =
 579       ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks,
 580                           file.get(), &context);
 581   if (version == kInvalidVersion)
 582     return false;
 583
 584   if (version == 7) {
 585     if (!db_state->AppendData(header.v7.add_prefix_count,
 586                               header.v7.sub_prefix_count,
 587                               header.v7.add_hash_count,
 588                               header.v7.sub_hash_count,
 589                               file.get(), &context)) {
 590       return false;
 591     }
 592
 593     // v7 data was not stored sorted.
 594     db_state->SortData();
 595   } else {
 596     // Read until the shard start overflows, always at least one pass.
 597     uint64 in_min = 0;
 598     uint64 in_stride = header.v8.shard_stride;
 599     if (!in_stride)
 600       in_stride = kMaxShardStride;
 601     if (!IsPowerOfTwo(in_stride))
 602       return false;
 603
 604     do {
 605       ShardHeader shard_header;
 606       if (!ReadItem(&shard_header, file.get(), &context))
 607         return false;
 608
 609       if (!db_state->AppendData(shard_header.add_prefix_count,
 610                                 shard_header.sub_prefix_count,
 611                                 shard_header.add_hash_count,
 612                                 shard_header.sub_hash_count,
 613                                 file.get(), &context)) {
 614         return false;
 615       }
 616
 617       in_min += in_stride;
 618     } while (in_min <= kMaxSBPrefix);
 619   }
 620
 621   if (!ReadAndVerifyChecksum(file.get(), &context))
 622     return false;
 623
 624   int64 size = 0;
 625   if (!base::GetFileSize(filename, &size))
 626     return false;
 627
 628   return static_cast<int64>(ftell(file.get())) == size;
 629 }
 630
 631 }  // namespace
 632
 633 // static
 634 void SafeBrowsingStoreFile::CheckForOriginalAndDelete(
 635     const base::FilePath& current_filename) {
 636   const base::FilePath original_filename(
 637       current_filename.DirName().AppendASCII("Safe Browsing"));
 638   if (base::PathExists(original_filename)) {
 639     int64 size = 0;
 640     if (base::GetFileSize(original_filename, &size)) {
 641       UMA_HISTOGRAM_COUNTS("SB2.OldDatabaseKilobytes",
 642                            static_cast<int>(size / 1024));
 643     }
 644
 645     if (base::DeleteFile(original_filename, false)) {
 646       RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL);
 647     } else {
 648       RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL_FAILED);
 649     }
 650
 651     // Just best-effort on the journal file, don't want to get lost in
 652     // the weeds.
 653     const base::FilePath journal_filename(
 654         current_filename.DirName().AppendASCII("Safe Browsing-journal"));
 655     base::DeleteFile(journal_filename, false);
 656   }
 657 }
 658
 659 SafeBrowsingStoreFile::SafeBrowsingStoreFile()
 660     : chunks_written_(0), empty_(false), corruption_seen_(false) {}
 661
 662 SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
 663   Close();
 664 }
 665
 666 bool SafeBrowsingStoreFile::Delete() {
 667   // The database should not be open at this point.  But, just in
 668   // case, close everything before deleting.
 669   if (!Close()) {
 670     NOTREACHED();
 671     return false;
 672   }
 673
 674   return DeleteStore(filename_);
 675 }
 676
 677 bool SafeBrowsingStoreFile::CheckValidity() {
 678   // The file was either empty or never opened.  The empty case is
 679   // presumed not to be invalid.  The never-opened case can happen if
 680   // BeginUpdate() fails for any databases, and should already have
 681   // caused the corruption callback to fire.
 682   if (!file_.get())
 683     return true;
 684
 685   if (!FileRewind(file_.get()))
 686     return OnCorruptDatabase();
 687
 688   int64 size = 0;
 689   if (!base::GetFileSize(filename_, &size))
 690     return OnCorruptDatabase();
 691
 692   base::MD5Context context;
 693   base::MD5Init(&context);
 694
 695   // Read everything except the final digest.
 696   size_t bytes_left = static_cast<size_t>(size);
 697   CHECK(size == static_cast<int64>(bytes_left));
 698   if (bytes_left < sizeof(base::MD5Digest))
 699     return OnCorruptDatabase();
 700   bytes_left -= sizeof(base::MD5Digest);
 701
 702   // Fold the contents of the file into the checksum.
 703   while (bytes_left > 0) {
 704     char buf[4096];
 705     const size_t c = std::min(sizeof(buf), bytes_left);
 706     const size_t ret = fread(buf, 1, c, file_.get());
 707
 708     // The file's size changed while reading, give up.
 709     if (ret != c)
 710       return OnCorruptDatabase();
 711     base::MD5Update(&context, base::StringPiece(buf, c));
 712     bytes_left -= c;
 713   }
 714
 715   if (!ReadAndVerifyChecksum(file_.get(), &context)) {
 716     RecordFormatEvent(FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE);
 717     return OnCorruptDatabase();
 718   }
 719
 720   return true;
 721 }
 722
 723 void SafeBrowsingStoreFile::Init(
 724     const base::FilePath& filename,
 725     const base::Closure& corruption_callback
 726 ) {
 727   filename_ = filename;
 728   corruption_callback_ = corruption_callback;
 729 }
 730
 731 bool SafeBrowsingStoreFile::BeginChunk() {
 732   return ClearChunkBuffers();
 733 }
 734
 735 bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
 736   add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));
 737   return true;
 738 }
 739
 740 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {
 741   add_prefixes->clear();
 742   if (!base::PathExists(filename_))
 743     return true;
 744
 745   StateInternal db_state;
 746   if (!ReadDbStateHelper(filename_, &db_state))
 747     return OnCorruptDatabase();
 748
 749   add_prefixes->swap(db_state.add_prefixes_);
 750   return true;
 751 }
 752
 753 bool SafeBrowsingStoreFile::GetAddFullHashes(
 754     std::vector<SBAddFullHash>* add_full_hashes) {
 755   add_full_hashes->clear();
 756   if (!base::PathExists(filename_))
 757     return true;
 758
 759   StateInternal db_state;
 760   if (!ReadDbStateHelper(filename_, &db_state))
 761     return OnCorruptDatabase();
 762
 763   add_full_hashes->swap(db_state.add_full_hashes_);
 764   return true;
 765 }
 766
 767 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,
 768                                          const SBFullHash& full_hash) {
 769   add_hashes_.push_back(SBAddFullHash(chunk_id, full_hash));
 770   return true;
 771 }
 772
 773 bool SafeBrowsingStoreFile::WriteSubPrefix(int32 chunk_id,
 774                                            int32 add_chunk_id,
 775                                            SBPrefix prefix) {
 776   sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));
 777   return true;
 778 }
 779
 780 bool SafeBrowsingStoreFile::WriteSubHash(int32 chunk_id, int32 add_chunk_id,
 781                                          const SBFullHash& full_hash) {
 782   sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash));
 783   return true;
 784 }
 785
 786 bool SafeBrowsingStoreFile::OnCorruptDatabase() {
 787   if (!corruption_seen_)
 788     RecordFormatEvent(FORMAT_EVENT_FILE_CORRUPT);
 789   corruption_seen_ = true;
 790
 791   corruption_callback_.Run();
 792
 793   // Return false as a convenience to callers.
 794   return false;
 795 }
 796
 797 bool SafeBrowsingStoreFile::Close() {
 798   ClearUpdateBuffers();
 799
 800   // Make sure the files are closed.
 801   file_.reset();
 802   new_file_.reset();
 803   return true;
 804 }
 805
 806 bool SafeBrowsingStoreFile::BeginUpdate() {
 807   DCHECK(!file_.get() && !new_file_.get());
 808
 809   // Structures should all be clear unless something bad happened.
 810   DCHECK(add_chunks_cache_.empty());
 811   DCHECK(sub_chunks_cache_.empty());
 812   DCHECK(add_del_cache_.empty());
 813   DCHECK(sub_del_cache_.empty());
 814   DCHECK(add_prefixes_.empty());
 815   DCHECK(sub_prefixes_.empty());
 816   DCHECK(add_hashes_.empty());
 817   DCHECK(sub_hashes_.empty());
 818   DCHECK_EQ(chunks_written_, 0);
 819
 820   // Since the following code will already hit the profile looking for
 821   // database files, this is a reasonable to time delete any old
 822   // files.
 823   CheckForOriginalAndDelete(filename_);
 824
 825   corruption_seen_ = false;
 826
 827   const base::FilePath new_filename = TemporaryFileForFilename(filename_);
 828   base::ScopedFILE new_file(base::OpenFile(new_filename, "wb+"));
 829   if (new_file.get() == NULL)
 830     return false;
 831
 832   base::ScopedFILE file(base::OpenFile(filename_, "rb"));
 833   empty_ = (file.get() == NULL);
 834   if (empty_) {
 835     // If the file exists but cannot be opened, try to delete it (not
 836     // deleting directly, the bloom filter needs to be deleted, too).
 837     if (base::PathExists(filename_))
 838       return OnCorruptDatabase();
 839
 840     new_file_.swap(new_file);
 841     return true;
 842   }
 843
 844   base::MD5Context context;
 845   FileHeader header;
 846   const int version =
 847       ReadAndVerifyHeader(filename_, &header,
 848                           &add_chunks_cache_, &sub_chunks_cache_,
 849                           file.get(), &context);
 850   if (version == kInvalidVersion) {
 851     FileHeaderV8 retry_header;
 852     if (FileRewind(file.get()) && ReadItem(&retry_header, file.get(), NULL) &&
 853         (retry_header.magic != kFileMagic ||
 854          (retry_header.version != 8 && retry_header.version != 7))) {
 855       // TODO(shess): Think on whether these histograms are generating any
 856       // actionable data.  I kid you not, SQLITE happens many thousands of times
 857       // per day, UNKNOWN about 3x higher than that.
 858       if (!strcmp(reinterpret_cast<char*>(&retry_header.magic),
 859                   "SQLite format 3")) {
 860         RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
 861       } else {
 862         RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
 863       }
 864     }
 865
 866     // Close the file so that it can be deleted.
 867     file.reset();
 868
 869     return OnCorruptDatabase();
 870   }
 871
 872   file_.swap(file);
 873   new_file_.swap(new_file);
 874   return true;
 875 }
 876
 877 bool SafeBrowsingStoreFile::FinishChunk() {
 878   if (!add_prefixes_.size() && !sub_prefixes_.size() &&
 879       !add_hashes_.size() && !sub_hashes_.size())
 880     return true;
 881
 882   ChunkHeader header;
 883   header.add_prefix_count = add_prefixes_.size();
 884   header.sub_prefix_count = sub_prefixes_.size();
 885   header.add_hash_count = add_hashes_.size();
 886   header.sub_hash_count = sub_hashes_.size();
 887   if (!WriteItem(header, new_file_.get(), NULL))
 888     return false;
 889
 890   if (!WriteContainer(add_prefixes_, new_file_.get(), NULL) ||
 891       !WriteContainer(sub_prefixes_, new_file_.get(), NULL) ||
 892       !WriteContainer(add_hashes_, new_file_.get(), NULL) ||
 893       !WriteContainer(sub_hashes_, new_file_.get(), NULL))
 894     return false;
 895
 896   ++chunks_written_;
 897
 898   // Clear everything to save memory.
 899   return ClearChunkBuffers();
 900 }
 901
 902 bool SafeBrowsingStoreFile::DoUpdate(
 903     safe_browsing::PrefixSetBuilder* builder,
 904     std::vector<SBAddFullHash>* add_full_hashes_result) {
 905   DCHECK(file_.get() || empty_);
 906   DCHECK(new_file_.get());
 907   CHECK(builder);
 908   CHECK(add_full_hashes_result);
 909
 910   // Rewind the temporary storage.
 911   if (!FileRewind(new_file_.get()))
 912     return false;
 913
 914   // Get chunk file's size for validating counts.
 915   int64 update_size = 0;
 916   if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size))
 917     return OnCorruptDatabase();
 918
 919   // Track update size to answer questions at http://crbug.com/72216 .
 920   // Log small updates as 1k so that the 0 (underflow) bucket can be
 921   // used for "empty" in SafeBrowsingDatabase.
 922   UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",
 923                        std::max(static_cast<int>(update_size / 1024), 1));
 924
 925   // Chunk updates to integrate.
 926   StateInternal new_state;
 927
 928   // Read update chunks.
 929   for (int i = 0; i < chunks_written_; ++i) {
 930     ChunkHeader header;
 931
 932     int64 ofs = ftell(new_file_.get());
 933     if (ofs == -1)
 934       return false;
 935
 936     if (!ReadItem(&header, new_file_.get(), NULL))
 937       return false;
 938
 939     // As a safety measure, make sure that the header describes a sane
 940     // chunk, given the remaining file size.
 941     int64 expected_size = ofs + sizeof(ChunkHeader);
 942     expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
 943     expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
 944     expected_size += header.add_hash_count * sizeof(SBAddFullHash);
 945     expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
 946     if (expected_size > update_size)
 947       return false;
 948
 949     if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count,
 950                               header.add_hash_count, header.sub_hash_count,
 951                               new_file_.get(), NULL)) {
 952       return false;
 953     }
 954   }
 955
 956   // The state was accumulated by chunk, sort by prefix.
 957   new_state.SortData();
 958
 959   // These strides control how much data is loaded into memory per pass.
 960   // Strides must be an even power of two.  |in_stride| will be derived from the
 961   // input file.  |out_stride| will be derived from an estimate of the resulting
 962   // file's size.  |process_stride| will be the max of both.
 963   uint64 in_stride = kMaxShardStride;
 964   uint64 out_stride = kMaxShardStride;
 965   uint64 process_stride = 0;
 966
 967   // The header info is only used later if |!empty_|.  The v8 read loop only
 968   // needs |in_stride|, while v7 needs to refer to header information.
 969   base::MD5Context in_context;
 970   int version = kInvalidVersion;
 971   FileHeader header;
 972
 973   if (!empty_) {
 974     DCHECK(file_.get());
 975
 976     version = ReadAndVerifyHeader(filename_, &header,
 977                                   &add_chunks_cache_, &sub_chunks_cache_,
 978                                   file_.get(), &in_context);
 979     if (version == kInvalidVersion)
 980       return OnCorruptDatabase();
 981
 982     if (version == 8 && header.v8.shard_stride)
 983       in_stride = header.v8.shard_stride;
 984
 985     // The header checksum should have prevented this case, but the code will be
 986     // broken if this is not correct.
 987     if (!IsPowerOfTwo(in_stride))
 988       return OnCorruptDatabase();
 989   }
 990
 991   // We no longer need to track deleted chunks.
 992   DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
 993   DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
 994
 995   // Calculate |out_stride| to break the file down into reasonable shards.
 996   {
 997     int64 original_size = 0;
 998     if (!empty_ && !base::GetFileSize(filename_, &original_size))
 999       return OnCorruptDatabase();
1000
1001     // Approximate the final size as everything.  Subs and deletes will reduce
1002     // the size, but modest over-sharding won't hurt much.
1003     int64 shard_size = original_size + update_size;
1004
1005     // Keep splitting until a single stride of data fits the target.
1006     size_t shifts = 0;
1007     while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) {
1008       out_stride >>= 1;
1009       shard_size >>= 1;
1010       ++shifts;
1011     }
1012     UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts);
1013
1014     DCHECK(IsPowerOfTwo(out_stride));
1015   }
1016
1017   // Outer loop strides by the max of the input stride (to read integral shards)
1018   // and the output stride (to write integral shards).
1019   process_stride = std::max(in_stride, out_stride);
1020   DCHECK(IsPowerOfTwo(process_stride));
1021   DCHECK_EQ(0u, process_stride % in_stride);
1022   DCHECK_EQ(0u, process_stride % out_stride);
1023
1024   // Start writing the new data to |new_file_|.
1025   base::MD5Context out_context;
1026   if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_,
1027                    new_file_.get(), &out_context)) {
1028     return false;
1029   }
1030
1031   // Start at the beginning of the SBPrefix space.
1032   uint64 in_min = 0;
1033   uint64 out_min = 0;
1034   uint64 process_min = 0;
1035
1036   // Start at the beginning of the updates.
1037   StateInternalPos new_pos = new_state.StateBegin();
1038
1039   // Re-usable container for shard processing.
1040   StateInternal db_state;
1041
1042   // Track aggregate counts for histograms.
1043   size_t add_prefix_count = 0;
1044   size_t sub_prefix_count = 0;
1045
1046   do {
1047     // Maximum element in the current shard.
1048     SBPrefix process_max =
1049         static_cast<SBPrefix>(process_min + process_stride - 1);
1050     DCHECK_GT(process_max, process_min);
1051
1052     // Drop the data from previous pass.
1053     db_state.ClearData();
1054
1055     // Fill the processing shard with one or more input shards.
1056     if (!empty_) {
1057       if (version == 7) {
1058         // Treat v7 as a single-shard file.
1059         DCHECK_EQ(in_min, 0u);
1060         DCHECK_EQ(in_stride, kMaxShardStride);
1061         DCHECK_EQ(process_stride, kMaxShardStride);
1062         if (!db_state.AppendData(header.v7.add_prefix_count,
1063                                  header.v7.sub_prefix_count,
1064                                  header.v7.add_hash_count,
1065                                  header.v7.sub_hash_count,
1066                                  file_.get(), &in_context))
1067           return OnCorruptDatabase();
1068
1069         // v7 data is not sorted correctly.
1070         db_state.SortData();
1071       } else {
1072         do {
1073           ShardHeader shard_header;
1074           if (!ReadItem(&shard_header, file_.get(), &in_context))
1075             return OnCorruptDatabase();
1076
1077           if (!db_state.AppendData(shard_header.add_prefix_count,
1078                                    shard_header.sub_prefix_count,
1079                                    shard_header.add_hash_count,
1080                                    shard_header.sub_hash_count,
1081                                    file_.get(), &in_context))
1082             return OnCorruptDatabase();
1083
1084           in_min += in_stride;
1085         } while (in_min <= kMaxSBPrefix && in_min < process_max);
1086       }
1087     }
1088
1089     // Shard the update data to match the database data, then merge the update
1090     // data and process the results.
1091     {
1092       StateInternalPos new_end = new_state.ShardEnd(new_pos, process_max);
1093       db_state.MergeDataAndProcess(new_pos, new_end,
1094                                    add_del_cache_, sub_del_cache_);
1095       new_pos = new_end;
1096     }
1097
1098     // Collect the processed data for return to caller.
1099     for (size_t i = 0; i < db_state.add_prefixes_.size(); ++i) {
1100       builder->AddPrefix(db_state.add_prefixes_[i].prefix);
1101     }
1102     add_full_hashes_result->insert(add_full_hashes_result->end(),
1103                                    db_state.add_full_hashes_.begin(),
1104                                    db_state.add_full_hashes_.end());
1105     add_prefix_count += db_state.add_prefixes_.size();
1106     sub_prefix_count += db_state.sub_prefixes_.size();
1107
1108     // Write one or more shards of processed output.
1109     StateInternalPos out_pos = db_state.StateBegin();
1110     do {
1111       SBPrefix out_max = static_cast<SBPrefix>(out_min + out_stride - 1);
1112       DCHECK_GT(out_max, out_min);
1113
1114       StateInternalPos out_end = db_state.ShardEnd(out_pos, out_max);
1115       if (!db_state.WriteShard(out_pos, out_end, new_file_.get(), &out_context))
1116         return false;
1117       out_pos = out_end;
1118
1119       out_min += out_stride;
1120     } while (out_min == static_cast<SBPrefix>(out_min) &&
1121              out_min < process_max);
1122
1123     process_min += process_stride;
1124   } while (process_min <= kMaxSBPrefix);
1125
1126   // Verify the overall checksum.
1127   if (!empty_) {
1128     if (!ReadAndVerifyChecksum(file_.get(), &in_context)) {
1129       RecordFormatEvent(FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE);
1130       return OnCorruptDatabase();
1131     }
1132
1133     // TODO(shess): Verify EOF?
1134
1135     // Close the input file so the new file can be renamed over it.
1136     file_.reset();
1137   }
1138   DCHECK(!file_.get());
1139
1140   // Write the overall checksum.
1141   base::MD5Digest out_digest;
1142   base::MD5Final(&out_digest, &out_context);
1143   if (!WriteItem(out_digest, new_file_.get(), NULL))
1144     return false;
1145
1146   // Trim any excess left over from the temporary chunk data.
1147   if (!base::TruncateFile(new_file_.get()))
1148     return false;
1149
1150   // Close the file handle and swizzle the file into place.
1151   new_file_.reset();
1152   if (!base::DeleteFile(filename_, false) &&
1153       base::PathExists(filename_))
1154     return false;
1155
1156   const base::FilePath new_filename = TemporaryFileForFilename(filename_);
1157   if (!base::Move(new_filename, filename_))
1158     return false;
1159
1160   // Record counts before swapping to caller.
1161   UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefix_count);
1162   UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefix_count);
1163
1164   return true;
1165 }
1166
1167 bool SafeBrowsingStoreFile::FinishUpdate(
1168     safe_browsing::PrefixSetBuilder* builder,
1169     std::vector<SBAddFullHash>* add_full_hashes_result) {
1170   DCHECK(builder);
1171   DCHECK(add_full_hashes_result);
1172
1173   if (!DoUpdate(builder, add_full_hashes_result)) {
1174     CancelUpdate();
1175     return false;
1176   }
1177
1178   DCHECK(!new_file_.get());
1179   DCHECK(!file_.get());
1180
1181   return Close();
1182 }
1183
1184 bool SafeBrowsingStoreFile::CancelUpdate() {
1185   return Close();
1186 }
1187
1188 void SafeBrowsingStoreFile::SetAddChunk(int32 chunk_id) {
1189   add_chunks_cache_.insert(chunk_id);
1190 }
1191
1192 bool SafeBrowsingStoreFile::CheckAddChunk(int32 chunk_id) {
1193   return add_chunks_cache_.count(chunk_id) > 0;
1194 }
1195
1196 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32>* out) {
1197   out->clear();
1198   out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());
1199 }
1200
1201 void SafeBrowsingStoreFile::SetSubChunk(int32 chunk_id) {
1202   sub_chunks_cache_.insert(chunk_id);
1203 }
1204
1205 bool SafeBrowsingStoreFile::CheckSubChunk(int32 chunk_id) {
1206   return sub_chunks_cache_.count(chunk_id) > 0;
1207 }
1208
1209 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32>* out) {
1210   out->clear();
1211   out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());
1212 }
1213
1214 void SafeBrowsingStoreFile::DeleteAddChunk(int32 chunk_id) {
1215   add_del_cache_.insert(chunk_id);
1216 }
1217
1218 void SafeBrowsingStoreFile::DeleteSubChunk(int32 chunk_id) {
1219   sub_del_cache_.insert(chunk_id);
1220 }
1221
1222 // static
1223 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) {
1224   if (!base::DeleteFile(basename, false) &&
1225       base::PathExists(basename)) {
1226     NOTREACHED();
1227     return false;
1228   }
1229
1230   const base::FilePath new_filename = TemporaryFileForFilename(basename);
1231   if (!base::DeleteFile(new_filename, false) &&
1232       base::PathExists(new_filename)) {
1233     NOTREACHED();
1234     return false;
1235   }
1236
1237   // With SQLite support gone, one way to get to this code is if the
1238   // existing file is a SQLite file.  Make sure the journal file is
1239   // also removed.
1240   const base::FilePath journal_filename(
1241       basename.value() + FILE_PATH_LITERAL("-journal"));
1242   if (base::PathExists(journal_filename))
1243     base::DeleteFile(journal_filename, false);
1244
1245   return true;
1246 }