Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / net / disk_cache / simple / simple_index_file.cc
blobc013084041f301cbd5ee378eb0642436d09ba910
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
7 #include <vector>
9 #include "base/files/file.h"
10 #include "base/files/file_util.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/pickle.h"
15 #include "base/single_thread_task_runner.h"
16 #include "base/task_runner_util.h"
17 #include "base/threading/thread_restrictions.h"
18 #include "net/disk_cache/simple/simple_backend_version.h"
19 #include "net/disk_cache/simple/simple_entry_format.h"
20 #include "net/disk_cache/simple/simple_histogram_macros.h"
21 #include "net/disk_cache/simple/simple_index.h"
22 #include "net/disk_cache/simple/simple_synchronous_entry.h"
23 #include "net/disk_cache/simple/simple_util.h"
24 #include "third_party/zlib/zlib.h"
26 using base::File;
28 namespace disk_cache {
29 namespace {
31 const int kEntryFilesHashLength = 16;
32 const int kEntryFilesSuffixLength = 2;
34 const uint64 kMaxEntiresInIndex = 100000000;
36 uint32 CalculatePickleCRC(const base::Pickle& pickle) {
37 return crc32(crc32(0, Z_NULL, 0),
38 reinterpret_cast<const Bytef*>(pickle.payload()),
39 pickle.payload_size());
42 // Used in histograms. Please only add new values at the end.
43 enum IndexFileState {
44 INDEX_STATE_CORRUPT = 0,
45 INDEX_STATE_STALE = 1,
46 INDEX_STATE_FRESH = 2,
47 INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
48 INDEX_STATE_MAX = 4,
51 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
52 SIMPLE_CACHE_UMA(ENUMERATION,
53 "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
56 // Used in histograms. Please only add new values at the end.
57 enum IndexInitMethod {
58 INITIALIZE_METHOD_RECOVERED = 0,
59 INITIALIZE_METHOD_LOADED = 1,
60 INITIALIZE_METHOD_NEWCACHE = 2,
61 INITIALIZE_METHOD_MAX = 3,
64 void UmaRecordIndexInitMethod(IndexInitMethod method,
65 net::CacheType cache_type) {
66 SIMPLE_CACHE_UMA(ENUMERATION,
67 "IndexInitializeMethod", cache_type,
68 method, INITIALIZE_METHOD_MAX);
71 bool WritePickleFile(base::Pickle* pickle, const base::FilePath& file_name) {
72 File file(
73 file_name,
74 File::FLAG_CREATE | File::FLAG_WRITE | File::FLAG_SHARE_DELETE);
75 if (!file.IsValid())
76 return false;
78 int bytes_written =
79 file.Write(0, static_cast<const char*>(pickle->data()), pickle->size());
80 if (bytes_written != implicit_cast<int>(pickle->size())) {
81 simple_util::SimpleCacheDeleteFile(file_name);
82 return false;
84 return true;
87 // Called for each cache directory traversal iteration.
88 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
89 const base::FilePath& file_path) {
90 static const size_t kEntryFilesLength =
91 kEntryFilesHashLength + kEntryFilesSuffixLength;
92 // Converting to std::string is OK since we never use UTF8 wide chars in our
93 // file names.
94 const base::FilePath::StringType base_name = file_path.BaseName().value();
95 const std::string file_name(base_name.begin(), base_name.end());
96 if (file_name.size() != kEntryFilesLength)
97 return;
98 const base::StringPiece hash_string(
99 file_name.begin(), file_name.begin() + kEntryFilesHashLength);
100 uint64 hash_key = 0;
101 if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
102 LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
103 << " disk: " << file_name;
104 return;
107 File::Info file_info;
108 if (!base::GetFileInfo(file_path, &file_info)) {
109 LOG(ERROR) << "Could not get file info for " << file_path.value();
110 return;
112 base::Time last_used_time;
113 #if defined(OS_POSIX)
114 // For POSIX systems, a last access time is available. However, it's not
115 // guaranteed to be more accurate than mtime. It is no worse though.
116 last_used_time = file_info.last_accessed;
117 #endif
118 if (last_used_time.is_null())
119 last_used_time = file_info.last_modified;
121 int64 file_size = file_info.size;
122 SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
123 if (it == entries->end()) {
124 SimpleIndex::InsertInEntrySet(
125 hash_key,
126 EntryMetadata(last_used_time, file_size),
127 entries);
128 } else {
129 // Summing up the total size of the entry through all the *_[0-1] files
130 it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
134 } // namespace
136 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
137 flush_required(false) {
140 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
143 void SimpleIndexLoadResult::Reset() {
144 did_load = false;
145 flush_required = false;
146 entries.clear();
149 // static
150 const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
151 // static
152 const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
153 // static
154 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
156 SimpleIndexFile::IndexMetadata::IndexMetadata()
157 : magic_number_(kSimpleIndexMagicNumber),
158 version_(kSimpleVersion),
159 number_of_entries_(0),
160 cache_size_(0) {}
162 SimpleIndexFile::IndexMetadata::IndexMetadata(
163 uint64 number_of_entries, uint64 cache_size)
164 : magic_number_(kSimpleIndexMagicNumber),
165 version_(kSimpleVersion),
166 number_of_entries_(number_of_entries),
167 cache_size_(cache_size) {}
169 void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle* pickle) const {
170 DCHECK(pickle);
171 pickle->WriteUInt64(magic_number_);
172 pickle->WriteUInt32(version_);
173 pickle->WriteUInt64(number_of_entries_);
174 pickle->WriteUInt64(cache_size_);
177 // static
178 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
179 base::Pickle* pickle) {
180 if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
181 return false;
182 SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
183 header_p->crc = CalculatePickleCRC(*pickle);
184 return true;
187 bool SimpleIndexFile::IndexMetadata::Deserialize(base::PickleIterator* it) {
188 DCHECK(it);
189 return it->ReadUInt64(&magic_number_) &&
190 it->ReadUInt32(&version_) &&
191 it->ReadUInt64(&number_of_entries_)&&
192 it->ReadUInt64(&cache_size_);
195 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
196 const base::FilePath& cache_directory,
197 const base::FilePath& index_filename,
198 const base::FilePath& temp_index_filename,
199 scoped_ptr<base::Pickle> pickle,
200 const base::TimeTicks& start_time,
201 bool app_on_background) {
202 DCHECK_EQ(index_filename.DirName().value(),
203 temp_index_filename.DirName().value());
204 base::FilePath index_file_directory = temp_index_filename.DirName();
205 if (!base::DirectoryExists(index_file_directory) &&
206 !base::CreateDirectory(index_file_directory)) {
207 LOG(ERROR) << "Could not create a directory to hold the index file";
208 return;
211 // There is a chance that the index containing all the necessary data about
212 // newly created entries will appear to be stale. This can happen if on-disk
213 // part of a Create operation does not fit into the time budget for the index
214 // flush delay. This simple approach will be reconsidered if it does not allow
215 // for maintaining freshness.
216 base::Time cache_dir_mtime;
217 if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
218 LOG(ERROR) << "Could obtain information about cache age";
219 return;
221 SerializeFinalData(cache_dir_mtime, pickle.get());
222 if (!WritePickleFile(pickle.get(), temp_index_filename)) {
223 LOG(ERROR) << "Failed to write the temporary index file";
224 return;
227 // Atomically rename the temporary index file to become the real one.
228 // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
229 // The rename failing during shutdown is legal because it's legal to begin
230 // erasing a cache as soon as the destructor has been called.
231 if (!base::ReplaceFile(temp_index_filename, index_filename, NULL))
232 return;
234 if (app_on_background) {
235 SIMPLE_CACHE_UMA(TIMES,
236 "IndexWriteToDiskTime.Background", cache_type,
237 (base::TimeTicks::Now() - start_time));
238 } else {
239 SIMPLE_CACHE_UMA(TIMES,
240 "IndexWriteToDiskTime.Foreground", cache_type,
241 (base::TimeTicks::Now() - start_time));
245 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
246 return number_of_entries_ <= kMaxEntiresInIndex &&
247 magic_number_ == kSimpleIndexMagicNumber &&
248 version_ == kSimpleVersion;
251 SimpleIndexFile::SimpleIndexFile(
252 const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread,
253 const scoped_refptr<base::TaskRunner>& worker_pool,
254 net::CacheType cache_type,
255 const base::FilePath& cache_directory)
256 : cache_thread_(cache_thread),
257 worker_pool_(worker_pool),
258 cache_type_(cache_type),
259 cache_directory_(cache_directory),
260 index_file_(cache_directory_.AppendASCII(kIndexDirectory)
261 .AppendASCII(kIndexFileName)),
262 temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
263 .AppendASCII(kTempIndexFileName)) {
266 SimpleIndexFile::~SimpleIndexFile() {}
268 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
269 const base::Closure& callback,
270 SimpleIndexLoadResult* out_result) {
271 base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
272 cache_type_,
273 cache_last_modified, cache_directory_,
274 index_file_, out_result);
275 worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
278 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
279 uint64 cache_size,
280 const base::TimeTicks& start,
281 bool app_on_background,
282 const base::Closure& callback) {
283 IndexMetadata index_metadata(entry_set.size(), cache_size);
284 scoped_ptr<base::Pickle> pickle = Serialize(index_metadata, entry_set);
285 base::Closure task =
286 base::Bind(&SimpleIndexFile::SyncWriteToDisk,
287 cache_type_, cache_directory_, index_file_, temp_index_file_,
288 base::Passed(&pickle), start, app_on_background);
289 if (callback.is_null())
290 cache_thread_->PostTask(FROM_HERE, task);
291 else
292 cache_thread_->PostTaskAndReply(FROM_HERE, task, callback);
295 // static
296 void SimpleIndexFile::SyncLoadIndexEntries(
297 net::CacheType cache_type,
298 base::Time cache_last_modified,
299 const base::FilePath& cache_directory,
300 const base::FilePath& index_file_path,
301 SimpleIndexLoadResult* out_result) {
302 // Load the index and find its age.
303 base::Time last_cache_seen_by_index;
304 SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
306 // Consider the index loaded if it is fresh.
307 const bool index_file_existed = base::PathExists(index_file_path);
308 if (!out_result->did_load) {
309 if (index_file_existed)
310 UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
311 } else {
312 if (cache_last_modified <= last_cache_seen_by_index) {
313 base::Time latest_dir_mtime;
314 simple_util::GetMTime(cache_directory, &latest_dir_mtime);
315 if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
316 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
317 cache_type);
318 } else {
319 UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
321 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
322 return;
324 UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
327 // Reconstruct the index by scanning the disk for entries.
328 const base::TimeTicks start = base::TimeTicks::Now();
329 SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
330 SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
331 base::TimeTicks::Now() - start);
332 SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
333 out_result->entries.size());
334 if (index_file_existed) {
335 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
336 } else {
337 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
338 SIMPLE_CACHE_UMA(COUNTS,
339 "IndexCreatedEntryCount", cache_type,
340 out_result->entries.size());
344 // static
345 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
346 base::Time* out_last_cache_seen_by_index,
347 SimpleIndexLoadResult* out_result) {
348 out_result->Reset();
350 File file(index_filename,
351 File::FLAG_OPEN | File::FLAG_READ | File::FLAG_SHARE_DELETE);
352 if (!file.IsValid())
353 return;
355 base::MemoryMappedFile index_file_map;
356 if (!index_file_map.Initialize(file.Pass())) {
357 simple_util::SimpleCacheDeleteFile(index_filename);
358 return;
361 SimpleIndexFile::Deserialize(
362 reinterpret_cast<const char*>(index_file_map.data()),
363 index_file_map.length(),
364 out_last_cache_seen_by_index,
365 out_result);
367 if (!out_result->did_load)
368 simple_util::SimpleCacheDeleteFile(index_filename);
371 // static
372 scoped_ptr<base::Pickle> SimpleIndexFile::Serialize(
373 const SimpleIndexFile::IndexMetadata& index_metadata,
374 const SimpleIndex::EntrySet& entries) {
375 scoped_ptr<base::Pickle> pickle(
376 new base::Pickle(sizeof(SimpleIndexFile::PickleHeader)));
378 index_metadata.Serialize(pickle.get());
379 for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
380 it != entries.end(); ++it) {
381 pickle->WriteUInt64(it->first);
382 it->second.Serialize(pickle.get());
384 return pickle.Pass();
387 // static
388 void SimpleIndexFile::Deserialize(const char* data, int data_len,
389 base::Time* out_cache_last_modified,
390 SimpleIndexLoadResult* out_result) {
391 DCHECK(data);
393 out_result->Reset();
394 SimpleIndex::EntrySet* entries = &out_result->entries;
396 base::Pickle pickle(data, data_len);
397 if (!pickle.data()) {
398 LOG(WARNING) << "Corrupt Simple Index File.";
399 return;
402 base::PickleIterator pickle_it(pickle);
403 SimpleIndexFile::PickleHeader* header_p =
404 pickle.headerT<SimpleIndexFile::PickleHeader>();
405 const uint32 crc_read = header_p->crc;
406 const uint32 crc_calculated = CalculatePickleCRC(pickle);
408 if (crc_read != crc_calculated) {
409 LOG(WARNING) << "Invalid CRC in Simple Index file.";
410 return;
413 SimpleIndexFile::IndexMetadata index_metadata;
414 if (!index_metadata.Deserialize(&pickle_it)) {
415 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
416 return;
419 if (!index_metadata.CheckIndexMetadata()) {
420 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
421 return;
424 #if !defined(OS_WIN)
425 // TODO(gavinp): Consider using std::unordered_map.
426 entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
427 #endif
428 while (entries->size() < index_metadata.GetNumberOfEntries()) {
429 uint64 hash_key;
430 EntryMetadata entry_metadata;
431 if (!pickle_it.ReadUInt64(&hash_key) ||
432 !entry_metadata.Deserialize(&pickle_it)) {
433 LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
434 entries->clear();
435 return;
437 SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
440 int64 cache_last_modified;
441 if (!pickle_it.ReadInt64(&cache_last_modified)) {
442 entries->clear();
443 return;
445 DCHECK(out_cache_last_modified);
446 *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
448 out_result->did_load = true;
451 // static
452 void SimpleIndexFile::SyncRestoreFromDisk(
453 const base::FilePath& cache_directory,
454 const base::FilePath& index_file_path,
455 SimpleIndexLoadResult* out_result) {
456 VLOG(1) << "Simple Cache Index is being restored from disk.";
457 simple_util::SimpleCacheDeleteFile(index_file_path);
458 out_result->Reset();
459 SimpleIndex::EntrySet* entries = &out_result->entries;
461 const bool did_succeed = TraverseCacheDirectory(
462 cache_directory, base::Bind(&ProcessEntryFile, entries));
463 if (!did_succeed) {
464 LOG(ERROR) << "Could not reconstruct index from disk";
465 return;
467 out_result->did_load = true;
468 // When we restore from disk we write the merged index file to disk right
469 // away, this might save us from having to restore again next time.
470 out_result->flush_required = true;
473 // static
474 bool SimpleIndexFile::LegacyIsIndexFileStale(
475 base::Time cache_last_modified,
476 const base::FilePath& index_file_path) {
477 base::Time index_mtime;
478 if (!simple_util::GetMTime(index_file_path, &index_mtime))
479 return true;
480 return index_mtime < cache_last_modified;
483 } // namespace disk_cache