1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
9 #include "base/files/file.h"
10 #include "base/files/file_util.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/pickle.h"
15 #include "base/single_thread_task_runner.h"
16 #include "base/task_runner_util.h"
17 #include "base/threading/thread_restrictions.h"
18 #include "net/disk_cache/simple/simple_backend_version.h"
19 #include "net/disk_cache/simple/simple_entry_format.h"
20 #include "net/disk_cache/simple/simple_histogram_macros.h"
21 #include "net/disk_cache/simple/simple_index.h"
22 #include "net/disk_cache/simple/simple_synchronous_entry.h"
23 #include "net/disk_cache/simple/simple_util.h"
24 #include "third_party/zlib/zlib.h"
28 namespace disk_cache
{
31 const int kEntryFilesHashLength
= 16;
32 const int kEntryFilesSuffixLength
= 2;
34 const uint64 kMaxEntiresInIndex
= 100000000;
36 uint32
CalculatePickleCRC(const base::Pickle
& pickle
) {
37 return crc32(crc32(0, Z_NULL
, 0),
38 reinterpret_cast<const Bytef
*>(pickle
.payload()),
39 pickle
.payload_size());
42 // Used in histograms. Please only add new values at the end.
44 INDEX_STATE_CORRUPT
= 0,
45 INDEX_STATE_STALE
= 1,
46 INDEX_STATE_FRESH
= 2,
47 INDEX_STATE_FRESH_CONCURRENT_UPDATES
= 3,
51 void UmaRecordIndexFileState(IndexFileState state
, net::CacheType cache_type
) {
52 SIMPLE_CACHE_UMA(ENUMERATION
,
53 "IndexFileStateOnLoad", cache_type
, state
, INDEX_STATE_MAX
);
56 // Used in histograms. Please only add new values at the end.
57 enum IndexInitMethod
{
58 INITIALIZE_METHOD_RECOVERED
= 0,
59 INITIALIZE_METHOD_LOADED
= 1,
60 INITIALIZE_METHOD_NEWCACHE
= 2,
61 INITIALIZE_METHOD_MAX
= 3,
64 void UmaRecordIndexInitMethod(IndexInitMethod method
,
65 net::CacheType cache_type
) {
66 SIMPLE_CACHE_UMA(ENUMERATION
,
67 "IndexInitializeMethod", cache_type
,
68 method
, INITIALIZE_METHOD_MAX
);
71 bool WritePickleFile(base::Pickle
* pickle
, const base::FilePath
& file_name
) {
74 File::FLAG_CREATE
| File::FLAG_WRITE
| File::FLAG_SHARE_DELETE
);
79 file
.Write(0, static_cast<const char*>(pickle
->data()), pickle
->size());
80 if (bytes_written
!= implicit_cast
<int>(pickle
->size())) {
81 simple_util::SimpleCacheDeleteFile(file_name
);
87 // Called for each cache directory traversal iteration.
88 void ProcessEntryFile(SimpleIndex::EntrySet
* entries
,
89 const base::FilePath
& file_path
) {
90 static const size_t kEntryFilesLength
=
91 kEntryFilesHashLength
+ kEntryFilesSuffixLength
;
92 // Converting to std::string is OK since we never use UTF8 wide chars in our
94 const base::FilePath::StringType base_name
= file_path
.BaseName().value();
95 const std::string
file_name(base_name
.begin(), base_name
.end());
96 if (file_name
.size() != kEntryFilesLength
)
98 const base::StringPiece
hash_string(
99 file_name
.begin(), file_name
.begin() + kEntryFilesHashLength
);
101 if (!simple_util::GetEntryHashKeyFromHexString(hash_string
, &hash_key
)) {
102 LOG(WARNING
) << "Invalid entry hash key filename while restoring index from"
103 << " disk: " << file_name
;
107 File::Info file_info
;
108 if (!base::GetFileInfo(file_path
, &file_info
)) {
109 LOG(ERROR
) << "Could not get file info for " << file_path
.value();
112 base::Time last_used_time
;
113 #if defined(OS_POSIX)
114 // For POSIX systems, a last access time is available. However, it's not
115 // guaranteed to be more accurate than mtime. It is no worse though.
116 last_used_time
= file_info
.last_accessed
;
118 if (last_used_time
.is_null())
119 last_used_time
= file_info
.last_modified
;
121 int64 file_size
= file_info
.size
;
122 SimpleIndex::EntrySet::iterator it
= entries
->find(hash_key
);
123 if (it
== entries
->end()) {
124 SimpleIndex::InsertInEntrySet(
126 EntryMetadata(last_used_time
, file_size
),
129 // Summing up the total size of the entry through all the *_[0-1] files
130 it
->second
.SetEntrySize(it
->second
.GetEntrySize() + file_size
);
136 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
137 flush_required(false) {
140 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
143 void SimpleIndexLoadResult::Reset() {
145 flush_required
= false;
150 const char SimpleIndexFile::kIndexFileName
[] = "the-real-index";
152 const char SimpleIndexFile::kIndexDirectory
[] = "index-dir";
154 const char SimpleIndexFile::kTempIndexFileName
[] = "temp-index";
156 SimpleIndexFile::IndexMetadata::IndexMetadata()
157 : magic_number_(kSimpleIndexMagicNumber
),
158 version_(kSimpleVersion
),
159 number_of_entries_(0),
162 SimpleIndexFile::IndexMetadata::IndexMetadata(
163 uint64 number_of_entries
, uint64 cache_size
)
164 : magic_number_(kSimpleIndexMagicNumber
),
165 version_(kSimpleVersion
),
166 number_of_entries_(number_of_entries
),
167 cache_size_(cache_size
) {}
169 void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle
* pickle
) const {
171 pickle
->WriteUInt64(magic_number_
);
172 pickle
->WriteUInt32(version_
);
173 pickle
->WriteUInt64(number_of_entries_
);
174 pickle
->WriteUInt64(cache_size_
);
178 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified
,
179 base::Pickle
* pickle
) {
180 if (!pickle
->WriteInt64(cache_modified
.ToInternalValue()))
182 SimpleIndexFile::PickleHeader
* header_p
= pickle
->headerT
<PickleHeader
>();
183 header_p
->crc
= CalculatePickleCRC(*pickle
);
187 bool SimpleIndexFile::IndexMetadata::Deserialize(base::PickleIterator
* it
) {
189 return it
->ReadUInt64(&magic_number_
) &&
190 it
->ReadUInt32(&version_
) &&
191 it
->ReadUInt64(&number_of_entries_
)&&
192 it
->ReadUInt64(&cache_size_
);
195 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type
,
196 const base::FilePath
& cache_directory
,
197 const base::FilePath
& index_filename
,
198 const base::FilePath
& temp_index_filename
,
199 scoped_ptr
<base::Pickle
> pickle
,
200 const base::TimeTicks
& start_time
,
201 bool app_on_background
) {
202 DCHECK_EQ(index_filename
.DirName().value(),
203 temp_index_filename
.DirName().value());
204 base::FilePath index_file_directory
= temp_index_filename
.DirName();
205 if (!base::DirectoryExists(index_file_directory
) &&
206 !base::CreateDirectory(index_file_directory
)) {
207 LOG(ERROR
) << "Could not create a directory to hold the index file";
211 // There is a chance that the index containing all the necessary data about
212 // newly created entries will appear to be stale. This can happen if on-disk
213 // part of a Create operation does not fit into the time budget for the index
214 // flush delay. This simple approach will be reconsidered if it does not allow
215 // for maintaining freshness.
216 base::Time cache_dir_mtime
;
217 if (!simple_util::GetMTime(cache_directory
, &cache_dir_mtime
)) {
218 LOG(ERROR
) << "Could obtain information about cache age";
221 SerializeFinalData(cache_dir_mtime
, pickle
.get());
222 if (!WritePickleFile(pickle
.get(), temp_index_filename
)) {
223 LOG(ERROR
) << "Failed to write the temporary index file";
227 // Atomically rename the temporary index file to become the real one.
228 // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
229 // The rename failing during shutdown is legal because it's legal to begin
230 // erasing a cache as soon as the destructor has been called.
231 if (!base::ReplaceFile(temp_index_filename
, index_filename
, NULL
))
234 if (app_on_background
) {
235 SIMPLE_CACHE_UMA(TIMES
,
236 "IndexWriteToDiskTime.Background", cache_type
,
237 (base::TimeTicks::Now() - start_time
));
239 SIMPLE_CACHE_UMA(TIMES
,
240 "IndexWriteToDiskTime.Foreground", cache_type
,
241 (base::TimeTicks::Now() - start_time
));
245 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
246 return number_of_entries_
<= kMaxEntiresInIndex
&&
247 magic_number_
== kSimpleIndexMagicNumber
&&
248 version_
== kSimpleVersion
;
251 SimpleIndexFile::SimpleIndexFile(
252 const scoped_refptr
<base::SingleThreadTaskRunner
>& cache_thread
,
253 const scoped_refptr
<base::TaskRunner
>& worker_pool
,
254 net::CacheType cache_type
,
255 const base::FilePath
& cache_directory
)
256 : cache_thread_(cache_thread
),
257 worker_pool_(worker_pool
),
258 cache_type_(cache_type
),
259 cache_directory_(cache_directory
),
260 index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
261 .AppendASCII(kIndexFileName
)),
262 temp_index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
263 .AppendASCII(kTempIndexFileName
)) {
266 SimpleIndexFile::~SimpleIndexFile() {}
268 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified
,
269 const base::Closure
& callback
,
270 SimpleIndexLoadResult
* out_result
) {
271 base::Closure task
= base::Bind(&SimpleIndexFile::SyncLoadIndexEntries
,
273 cache_last_modified
, cache_directory_
,
274 index_file_
, out_result
);
275 worker_pool_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
278 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet
& entry_set
,
280 const base::TimeTicks
& start
,
281 bool app_on_background
,
282 const base::Closure
& callback
) {
283 IndexMetadata
index_metadata(entry_set
.size(), cache_size
);
284 scoped_ptr
<base::Pickle
> pickle
= Serialize(index_metadata
, entry_set
);
286 base::Bind(&SimpleIndexFile::SyncWriteToDisk
,
287 cache_type_
, cache_directory_
, index_file_
, temp_index_file_
,
288 base::Passed(&pickle
), start
, app_on_background
);
289 if (callback
.is_null())
290 cache_thread_
->PostTask(FROM_HERE
, task
);
292 cache_thread_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
296 void SimpleIndexFile::SyncLoadIndexEntries(
297 net::CacheType cache_type
,
298 base::Time cache_last_modified
,
299 const base::FilePath
& cache_directory
,
300 const base::FilePath
& index_file_path
,
301 SimpleIndexLoadResult
* out_result
) {
302 // Load the index and find its age.
303 base::Time last_cache_seen_by_index
;
304 SyncLoadFromDisk(index_file_path
, &last_cache_seen_by_index
, out_result
);
306 // Consider the index loaded if it is fresh.
307 const bool index_file_existed
= base::PathExists(index_file_path
);
308 if (!out_result
->did_load
) {
309 if (index_file_existed
)
310 UmaRecordIndexFileState(INDEX_STATE_CORRUPT
, cache_type
);
312 if (cache_last_modified
<= last_cache_seen_by_index
) {
313 base::Time latest_dir_mtime
;
314 simple_util::GetMTime(cache_directory
, &latest_dir_mtime
);
315 if (LegacyIsIndexFileStale(latest_dir_mtime
, index_file_path
)) {
316 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES
,
319 UmaRecordIndexFileState(INDEX_STATE_FRESH
, cache_type
);
321 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED
, cache_type
);
324 UmaRecordIndexFileState(INDEX_STATE_STALE
, cache_type
);
327 // Reconstruct the index by scanning the disk for entries.
328 const base::TimeTicks start
= base::TimeTicks::Now();
329 SyncRestoreFromDisk(cache_directory
, index_file_path
, out_result
);
330 SIMPLE_CACHE_UMA(MEDIUM_TIMES
, "IndexRestoreTime", cache_type
,
331 base::TimeTicks::Now() - start
);
332 SIMPLE_CACHE_UMA(COUNTS
, "IndexEntriesRestored", cache_type
,
333 out_result
->entries
.size());
334 if (index_file_existed
) {
335 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED
, cache_type
);
337 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE
, cache_type
);
338 SIMPLE_CACHE_UMA(COUNTS
,
339 "IndexCreatedEntryCount", cache_type
,
340 out_result
->entries
.size());
345 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath
& index_filename
,
346 base::Time
* out_last_cache_seen_by_index
,
347 SimpleIndexLoadResult
* out_result
) {
350 File
file(index_filename
,
351 File::FLAG_OPEN
| File::FLAG_READ
| File::FLAG_SHARE_DELETE
);
355 base::MemoryMappedFile index_file_map
;
356 if (!index_file_map
.Initialize(file
.Pass())) {
357 simple_util::SimpleCacheDeleteFile(index_filename
);
361 SimpleIndexFile::Deserialize(
362 reinterpret_cast<const char*>(index_file_map
.data()),
363 index_file_map
.length(),
364 out_last_cache_seen_by_index
,
367 if (!out_result
->did_load
)
368 simple_util::SimpleCacheDeleteFile(index_filename
);
372 scoped_ptr
<base::Pickle
> SimpleIndexFile::Serialize(
373 const SimpleIndexFile::IndexMetadata
& index_metadata
,
374 const SimpleIndex::EntrySet
& entries
) {
375 scoped_ptr
<base::Pickle
> pickle(
376 new base::Pickle(sizeof(SimpleIndexFile::PickleHeader
)));
378 index_metadata
.Serialize(pickle
.get());
379 for (SimpleIndex::EntrySet::const_iterator it
= entries
.begin();
380 it
!= entries
.end(); ++it
) {
381 pickle
->WriteUInt64(it
->first
);
382 it
->second
.Serialize(pickle
.get());
384 return pickle
.Pass();
388 void SimpleIndexFile::Deserialize(const char* data
, int data_len
,
389 base::Time
* out_cache_last_modified
,
390 SimpleIndexLoadResult
* out_result
) {
394 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
396 base::Pickle
pickle(data
, data_len
);
397 if (!pickle
.data()) {
398 LOG(WARNING
) << "Corrupt Simple Index File.";
402 base::PickleIterator
pickle_it(pickle
);
403 SimpleIndexFile::PickleHeader
* header_p
=
404 pickle
.headerT
<SimpleIndexFile::PickleHeader
>();
405 const uint32 crc_read
= header_p
->crc
;
406 const uint32 crc_calculated
= CalculatePickleCRC(pickle
);
408 if (crc_read
!= crc_calculated
) {
409 LOG(WARNING
) << "Invalid CRC in Simple Index file.";
413 SimpleIndexFile::IndexMetadata index_metadata
;
414 if (!index_metadata
.Deserialize(&pickle_it
)) {
415 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
419 if (!index_metadata
.CheckIndexMetadata()) {
420 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
425 // TODO(gavinp): Consider using std::unordered_map.
426 entries
->resize(index_metadata
.GetNumberOfEntries() + kExtraSizeForMerge
);
428 while (entries
->size() < index_metadata
.GetNumberOfEntries()) {
430 EntryMetadata entry_metadata
;
431 if (!pickle_it
.ReadUInt64(&hash_key
) ||
432 !entry_metadata
.Deserialize(&pickle_it
)) {
433 LOG(WARNING
) << "Invalid EntryMetadata in Simple Index file.";
437 SimpleIndex::InsertInEntrySet(hash_key
, entry_metadata
, entries
);
440 int64 cache_last_modified
;
441 if (!pickle_it
.ReadInt64(&cache_last_modified
)) {
445 DCHECK(out_cache_last_modified
);
446 *out_cache_last_modified
= base::Time::FromInternalValue(cache_last_modified
);
448 out_result
->did_load
= true;
452 void SimpleIndexFile::SyncRestoreFromDisk(
453 const base::FilePath
& cache_directory
,
454 const base::FilePath
& index_file_path
,
455 SimpleIndexLoadResult
* out_result
) {
456 VLOG(1) << "Simple Cache Index is being restored from disk.";
457 simple_util::SimpleCacheDeleteFile(index_file_path
);
459 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
461 const bool did_succeed
= TraverseCacheDirectory(
462 cache_directory
, base::Bind(&ProcessEntryFile
, entries
));
464 LOG(ERROR
) << "Could not reconstruct index from disk";
467 out_result
->did_load
= true;
468 // When we restore from disk we write the merged index file to disk right
469 // away, this might save us from having to restore again next time.
470 out_result
->flush_required
= true;
474 bool SimpleIndexFile::LegacyIsIndexFileStale(
475 base::Time cache_last_modified
,
476 const base::FilePath
& index_file_path
) {
477 base::Time index_mtime
;
478 if (!simple_util::GetMTime(index_file_path
, &index_mtime
))
480 return index_mtime
< cache_last_modified
;
483 } // namespace disk_cache