Roll src/third_party/WebKit eac3800:0237a66 (svn 202606:202607)
[chromium-blink-merge.git] / net / disk_cache / simple / simple_index_file.cc
blob11d37a95048b9093e57f3b4987dcd4c9a5d5e1e9
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
7 #include <vector>
9 #include "base/files/file.h"
10 #include "base/files/file_util.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/numerics/safe_conversions.h"
15 #include "base/pickle.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/task_runner_util.h"
18 #include "base/threading/thread_restrictions.h"
19 #include "net/disk_cache/simple/simple_backend_version.h"
20 #include "net/disk_cache/simple/simple_entry_format.h"
21 #include "net/disk_cache/simple/simple_histogram_macros.h"
22 #include "net/disk_cache/simple/simple_index.h"
23 #include "net/disk_cache/simple/simple_synchronous_entry.h"
24 #include "net/disk_cache/simple/simple_util.h"
25 #include "third_party/zlib/zlib.h"
27 using base::File;
29 namespace disk_cache {
30 namespace {
32 const int kEntryFilesHashLength = 16;
33 const int kEntryFilesSuffixLength = 2;
35 const uint64 kMaxEntiresInIndex = 100000000;
37 uint32 CalculatePickleCRC(const base::Pickle& pickle) {
38 return crc32(crc32(0, Z_NULL, 0),
39 reinterpret_cast<const Bytef*>(pickle.payload()),
40 pickle.payload_size());
43 // Used in histograms. Please only add new values at the end.
44 enum IndexFileState {
45 INDEX_STATE_CORRUPT = 0,
46 INDEX_STATE_STALE = 1,
47 INDEX_STATE_FRESH = 2,
48 INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
49 INDEX_STATE_MAX = 4,
52 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
53 SIMPLE_CACHE_UMA(ENUMERATION,
54 "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
57 // Used in histograms. Please only add new values at the end.
58 enum IndexInitMethod {
59 INITIALIZE_METHOD_RECOVERED = 0,
60 INITIALIZE_METHOD_LOADED = 1,
61 INITIALIZE_METHOD_NEWCACHE = 2,
62 INITIALIZE_METHOD_MAX = 3,
65 void UmaRecordIndexInitMethod(IndexInitMethod method,
66 net::CacheType cache_type) {
67 SIMPLE_CACHE_UMA(ENUMERATION,
68 "IndexInitializeMethod", cache_type,
69 method, INITIALIZE_METHOD_MAX);
72 bool WritePickleFile(base::Pickle* pickle, const base::FilePath& file_name) {
73 File file(
74 file_name,
75 File::FLAG_CREATE | File::FLAG_WRITE | File::FLAG_SHARE_DELETE);
76 if (!file.IsValid())
77 return false;
79 int bytes_written =
80 file.Write(0, static_cast<const char*>(pickle->data()), pickle->size());
81 if (bytes_written != base::checked_cast<int>(pickle->size())) {
82 simple_util::SimpleCacheDeleteFile(file_name);
83 return false;
85 return true;
88 // Called for each cache directory traversal iteration.
89 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
90 const base::FilePath& file_path) {
91 static const size_t kEntryFilesLength =
92 kEntryFilesHashLength + kEntryFilesSuffixLength;
93 // Converting to std::string is OK since we never use UTF8 wide chars in our
94 // file names.
95 const base::FilePath::StringType base_name = file_path.BaseName().value();
96 const std::string file_name(base_name.begin(), base_name.end());
97 if (file_name.size() != kEntryFilesLength)
98 return;
99 const base::StringPiece hash_string(
100 file_name.begin(), file_name.begin() + kEntryFilesHashLength);
101 uint64 hash_key = 0;
102 if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
103 LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
104 << " disk: " << file_name;
105 return;
108 File::Info file_info;
109 if (!base::GetFileInfo(file_path, &file_info)) {
110 LOG(ERROR) << "Could not get file info for " << file_path.value();
111 return;
113 base::Time last_used_time;
114 #if defined(OS_POSIX)
115 // For POSIX systems, a last access time is available. However, it's not
116 // guaranteed to be more accurate than mtime. It is no worse though.
117 last_used_time = file_info.last_accessed;
118 #endif
119 if (last_used_time.is_null())
120 last_used_time = file_info.last_modified;
122 int64 file_size = file_info.size;
123 SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
124 if (it == entries->end()) {
125 SimpleIndex::InsertInEntrySet(
126 hash_key,
127 EntryMetadata(last_used_time, file_size),
128 entries);
129 } else {
130 // Summing up the total size of the entry through all the *_[0-1] files
131 it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
135 } // namespace
137 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
138 flush_required(false) {
141 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
144 void SimpleIndexLoadResult::Reset() {
145 did_load = false;
146 flush_required = false;
147 entries.clear();
150 // static
151 const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
152 // static
153 const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
154 // static
155 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
157 SimpleIndexFile::IndexMetadata::IndexMetadata()
158 : magic_number_(kSimpleIndexMagicNumber),
159 version_(kSimpleVersion),
160 number_of_entries_(0),
161 cache_size_(0) {}
163 SimpleIndexFile::IndexMetadata::IndexMetadata(
164 uint64 number_of_entries, uint64 cache_size)
165 : magic_number_(kSimpleIndexMagicNumber),
166 version_(kSimpleVersion),
167 number_of_entries_(number_of_entries),
168 cache_size_(cache_size) {}
170 void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle* pickle) const {
171 DCHECK(pickle);
172 pickle->WriteUInt64(magic_number_);
173 pickle->WriteUInt32(version_);
174 pickle->WriteUInt64(number_of_entries_);
175 pickle->WriteUInt64(cache_size_);
178 // static
179 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
180 base::Pickle* pickle) {
181 if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
182 return false;
183 SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
184 header_p->crc = CalculatePickleCRC(*pickle);
185 return true;
188 bool SimpleIndexFile::IndexMetadata::Deserialize(base::PickleIterator* it) {
189 DCHECK(it);
190 return it->ReadUInt64(&magic_number_) &&
191 it->ReadUInt32(&version_) &&
192 it->ReadUInt64(&number_of_entries_)&&
193 it->ReadUInt64(&cache_size_);
196 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
197 const base::FilePath& cache_directory,
198 const base::FilePath& index_filename,
199 const base::FilePath& temp_index_filename,
200 scoped_ptr<base::Pickle> pickle,
201 const base::TimeTicks& start_time,
202 bool app_on_background) {
203 DCHECK_EQ(index_filename.DirName().value(),
204 temp_index_filename.DirName().value());
205 base::FilePath index_file_directory = temp_index_filename.DirName();
206 if (!base::DirectoryExists(index_file_directory) &&
207 !base::CreateDirectory(index_file_directory)) {
208 LOG(ERROR) << "Could not create a directory to hold the index file";
209 return;
212 // There is a chance that the index containing all the necessary data about
213 // newly created entries will appear to be stale. This can happen if on-disk
214 // part of a Create operation does not fit into the time budget for the index
215 // flush delay. This simple approach will be reconsidered if it does not allow
216 // for maintaining freshness.
217 base::Time cache_dir_mtime;
218 if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
219 LOG(ERROR) << "Could obtain information about cache age";
220 return;
222 SerializeFinalData(cache_dir_mtime, pickle.get());
223 if (!WritePickleFile(pickle.get(), temp_index_filename)) {
224 LOG(ERROR) << "Failed to write the temporary index file";
225 return;
228 // Atomically rename the temporary index file to become the real one.
229 // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
230 // The rename failing during shutdown is legal because it's legal to begin
231 // erasing a cache as soon as the destructor has been called.
232 if (!base::ReplaceFile(temp_index_filename, index_filename, NULL))
233 return;
235 if (app_on_background) {
236 SIMPLE_CACHE_UMA(TIMES,
237 "IndexWriteToDiskTime.Background", cache_type,
238 (base::TimeTicks::Now() - start_time));
239 } else {
240 SIMPLE_CACHE_UMA(TIMES,
241 "IndexWriteToDiskTime.Foreground", cache_type,
242 (base::TimeTicks::Now() - start_time));
246 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
247 return number_of_entries_ <= kMaxEntiresInIndex &&
248 magic_number_ == kSimpleIndexMagicNumber &&
249 version_ == kSimpleVersion;
252 SimpleIndexFile::SimpleIndexFile(
253 const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread,
254 const scoped_refptr<base::TaskRunner>& worker_pool,
255 net::CacheType cache_type,
256 const base::FilePath& cache_directory)
257 : cache_thread_(cache_thread),
258 worker_pool_(worker_pool),
259 cache_type_(cache_type),
260 cache_directory_(cache_directory),
261 index_file_(cache_directory_.AppendASCII(kIndexDirectory)
262 .AppendASCII(kIndexFileName)),
263 temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
264 .AppendASCII(kTempIndexFileName)) {
267 SimpleIndexFile::~SimpleIndexFile() {}
269 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
270 const base::Closure& callback,
271 SimpleIndexLoadResult* out_result) {
272 base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
273 cache_type_,
274 cache_last_modified, cache_directory_,
275 index_file_, out_result);
276 worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
279 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
280 uint64 cache_size,
281 const base::TimeTicks& start,
282 bool app_on_background,
283 const base::Closure& callback) {
284 IndexMetadata index_metadata(entry_set.size(), cache_size);
285 scoped_ptr<base::Pickle> pickle = Serialize(index_metadata, entry_set);
286 base::Closure task =
287 base::Bind(&SimpleIndexFile::SyncWriteToDisk,
288 cache_type_, cache_directory_, index_file_, temp_index_file_,
289 base::Passed(&pickle), start, app_on_background);
290 if (callback.is_null())
291 cache_thread_->PostTask(FROM_HERE, task);
292 else
293 cache_thread_->PostTaskAndReply(FROM_HERE, task, callback);
296 // static
297 void SimpleIndexFile::SyncLoadIndexEntries(
298 net::CacheType cache_type,
299 base::Time cache_last_modified,
300 const base::FilePath& cache_directory,
301 const base::FilePath& index_file_path,
302 SimpleIndexLoadResult* out_result) {
303 // Load the index and find its age.
304 base::Time last_cache_seen_by_index;
305 SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
307 // Consider the index loaded if it is fresh.
308 const bool index_file_existed = base::PathExists(index_file_path);
309 if (!out_result->did_load) {
310 if (index_file_existed)
311 UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
312 } else {
313 if (cache_last_modified <= last_cache_seen_by_index) {
314 base::Time latest_dir_mtime;
315 simple_util::GetMTime(cache_directory, &latest_dir_mtime);
316 if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
317 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
318 cache_type);
319 } else {
320 UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
322 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
323 return;
325 UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
328 // Reconstruct the index by scanning the disk for entries.
329 const base::TimeTicks start = base::TimeTicks::Now();
330 SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
331 SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
332 base::TimeTicks::Now() - start);
333 SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
334 out_result->entries.size());
335 if (index_file_existed) {
336 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
337 } else {
338 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
339 SIMPLE_CACHE_UMA(COUNTS,
340 "IndexCreatedEntryCount", cache_type,
341 out_result->entries.size());
345 // static
346 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
347 base::Time* out_last_cache_seen_by_index,
348 SimpleIndexLoadResult* out_result) {
349 out_result->Reset();
351 File file(index_filename,
352 File::FLAG_OPEN | File::FLAG_READ | File::FLAG_SHARE_DELETE);
353 if (!file.IsValid())
354 return;
356 base::MemoryMappedFile index_file_map;
357 if (!index_file_map.Initialize(file.Pass())) {
358 simple_util::SimpleCacheDeleteFile(index_filename);
359 return;
362 SimpleIndexFile::Deserialize(
363 reinterpret_cast<const char*>(index_file_map.data()),
364 index_file_map.length(),
365 out_last_cache_seen_by_index,
366 out_result);
368 if (!out_result->did_load)
369 simple_util::SimpleCacheDeleteFile(index_filename);
372 // static
373 scoped_ptr<base::Pickle> SimpleIndexFile::Serialize(
374 const SimpleIndexFile::IndexMetadata& index_metadata,
375 const SimpleIndex::EntrySet& entries) {
376 scoped_ptr<base::Pickle> pickle(
377 new base::Pickle(sizeof(SimpleIndexFile::PickleHeader)));
379 index_metadata.Serialize(pickle.get());
380 for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
381 it != entries.end(); ++it) {
382 pickle->WriteUInt64(it->first);
383 it->second.Serialize(pickle.get());
385 return pickle.Pass();
388 // static
389 void SimpleIndexFile::Deserialize(const char* data, int data_len,
390 base::Time* out_cache_last_modified,
391 SimpleIndexLoadResult* out_result) {
392 DCHECK(data);
394 out_result->Reset();
395 SimpleIndex::EntrySet* entries = &out_result->entries;
397 base::Pickle pickle(data, data_len);
398 if (!pickle.data()) {
399 LOG(WARNING) << "Corrupt Simple Index File.";
400 return;
403 base::PickleIterator pickle_it(pickle);
404 SimpleIndexFile::PickleHeader* header_p =
405 pickle.headerT<SimpleIndexFile::PickleHeader>();
406 const uint32 crc_read = header_p->crc;
407 const uint32 crc_calculated = CalculatePickleCRC(pickle);
409 if (crc_read != crc_calculated) {
410 LOG(WARNING) << "Invalid CRC in Simple Index file.";
411 return;
414 SimpleIndexFile::IndexMetadata index_metadata;
415 if (!index_metadata.Deserialize(&pickle_it)) {
416 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
417 return;
420 if (!index_metadata.CheckIndexMetadata()) {
421 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
422 return;
425 #if !defined(OS_WIN)
426 // TODO(gavinp): Consider using std::unordered_map.
427 entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
428 #endif
429 while (entries->size() < index_metadata.GetNumberOfEntries()) {
430 uint64 hash_key;
431 EntryMetadata entry_metadata;
432 if (!pickle_it.ReadUInt64(&hash_key) ||
433 !entry_metadata.Deserialize(&pickle_it)) {
434 LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
435 entries->clear();
436 return;
438 SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
441 int64 cache_last_modified;
442 if (!pickle_it.ReadInt64(&cache_last_modified)) {
443 entries->clear();
444 return;
446 DCHECK(out_cache_last_modified);
447 *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
449 out_result->did_load = true;
452 // static
453 void SimpleIndexFile::SyncRestoreFromDisk(
454 const base::FilePath& cache_directory,
455 const base::FilePath& index_file_path,
456 SimpleIndexLoadResult* out_result) {
457 VLOG(1) << "Simple Cache Index is being restored from disk.";
458 simple_util::SimpleCacheDeleteFile(index_file_path);
459 out_result->Reset();
460 SimpleIndex::EntrySet* entries = &out_result->entries;
462 const bool did_succeed = TraverseCacheDirectory(
463 cache_directory, base::Bind(&ProcessEntryFile, entries));
464 if (!did_succeed) {
465 LOG(ERROR) << "Could not reconstruct index from disk";
466 return;
468 out_result->did_load = true;
469 // When we restore from disk we write the merged index file to disk right
470 // away, this might save us from having to restore again next time.
471 out_result->flush_required = true;
474 // static
475 bool SimpleIndexFile::LegacyIsIndexFileStale(
476 base::Time cache_last_modified,
477 const base::FilePath& index_file_path) {
478 base::Time index_mtime;
479 if (!simple_util::GetMTime(index_file_path, &index_mtime))
480 return true;
481 return index_mtime < cache_last_modified;
484 } // namespace disk_cache