1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
9 #include "base/file_util.h"
10 #include "base/files/memory_mapped_file.h"
11 #include "base/hash.h"
12 #include "base/logging.h"
13 #include "base/pickle.h"
14 #include "base/single_thread_task_runner.h"
15 #include "base/task_runner_util.h"
16 #include "base/threading/thread_restrictions.h"
17 #include "net/disk_cache/simple/simple_backend_version.h"
18 #include "net/disk_cache/simple/simple_entry_format.h"
19 #include "net/disk_cache/simple/simple_histogram_macros.h"
20 #include "net/disk_cache/simple/simple_index.h"
21 #include "net/disk_cache/simple/simple_synchronous_entry.h"
22 #include "net/disk_cache/simple/simple_util.h"
23 #include "third_party/zlib/zlib.h"
25 namespace disk_cache
{
28 const int kEntryFilesHashLength
= 16;
29 const int kEntryFilesSuffixLength
= 2;
31 const uint64 kMaxEntiresInIndex
= 100000000;
33 uint32
CalculatePickleCRC(const Pickle
& pickle
) {
34 return crc32(crc32(0, Z_NULL
, 0),
35 reinterpret_cast<const Bytef
*>(pickle
.payload()),
36 pickle
.payload_size());
39 // Used in histograms. Please only add new values at the end.
41 INDEX_STATE_CORRUPT
= 0,
42 INDEX_STATE_STALE
= 1,
43 INDEX_STATE_FRESH
= 2,
44 INDEX_STATE_FRESH_CONCURRENT_UPDATES
= 3,
48 void UmaRecordIndexFileState(IndexFileState state
, net::CacheType cache_type
) {
49 SIMPLE_CACHE_UMA(ENUMERATION
,
50 "IndexFileStateOnLoad", cache_type
, state
, INDEX_STATE_MAX
);
53 // Used in histograms. Please only add new values at the end.
54 enum IndexInitMethod
{
55 INITIALIZE_METHOD_RECOVERED
= 0,
56 INITIALIZE_METHOD_LOADED
= 1,
57 INITIALIZE_METHOD_NEWCACHE
= 2,
58 INITIALIZE_METHOD_MAX
= 3,
61 void UmaRecordIndexInitMethod(IndexInitMethod method
,
62 net::CacheType cache_type
) {
63 SIMPLE_CACHE_UMA(ENUMERATION
,
64 "IndexInitializeMethod", cache_type
,
65 method
, INITIALIZE_METHOD_MAX
);
68 bool WritePickleFile(Pickle
* pickle
, const base::FilePath
& file_name
) {
69 int bytes_written
= base::WriteFile(
70 file_name
, static_cast<const char*>(pickle
->data()), pickle
->size());
71 if (bytes_written
!= implicit_cast
<int>(pickle
->size())) {
72 base::DeleteFile(file_name
, /* recursive = */ false);
78 // Called for each cache directory traversal iteration.
79 void ProcessEntryFile(SimpleIndex::EntrySet
* entries
,
80 const base::FilePath
& file_path
) {
81 static const size_t kEntryFilesLength
=
82 kEntryFilesHashLength
+ kEntryFilesSuffixLength
;
83 // Converting to std::string is OK since we never use UTF8 wide chars in our
85 const base::FilePath::StringType base_name
= file_path
.BaseName().value();
86 const std::string
file_name(base_name
.begin(), base_name
.end());
87 if (file_name
.size() != kEntryFilesLength
)
89 const base::StringPiece
hash_string(
90 file_name
.begin(), file_name
.begin() + kEntryFilesHashLength
);
92 if (!simple_util::GetEntryHashKeyFromHexString(hash_string
, &hash_key
)) {
93 LOG(WARNING
) << "Invalid entry hash key filename while restoring index from"
94 << " disk: " << file_name
;
98 base::File::Info file_info
;
99 if (!base::GetFileInfo(file_path
, &file_info
)) {
100 LOG(ERROR
) << "Could not get file info for " << file_path
.value();
103 base::Time last_used_time
;
104 #if defined(OS_POSIX)
105 // For POSIX systems, a last access time is available. However, it's not
106 // guaranteed to be more accurate than mtime. It is no worse though.
107 last_used_time
= file_info
.last_accessed
;
109 if (last_used_time
.is_null())
110 last_used_time
= file_info
.last_modified
;
112 int64 file_size
= file_info
.size
;
113 SimpleIndex::EntrySet::iterator it
= entries
->find(hash_key
);
114 if (it
== entries
->end()) {
115 SimpleIndex::InsertInEntrySet(
117 EntryMetadata(last_used_time
, file_size
),
120 // Summing up the total size of the entry through all the *_[0-1] files
121 it
->second
.SetEntrySize(it
->second
.GetEntrySize() + file_size
);
127 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
128 flush_required(false) {
131 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
134 void SimpleIndexLoadResult::Reset() {
136 flush_required
= false;
141 const char SimpleIndexFile::kIndexFileName
[] = "the-real-index";
143 const char SimpleIndexFile::kIndexDirectory
[] = "index-dir";
145 const char SimpleIndexFile::kTempIndexFileName
[] = "temp-index";
147 SimpleIndexFile::IndexMetadata::IndexMetadata()
148 : magic_number_(kSimpleIndexMagicNumber
),
149 version_(kSimpleVersion
),
150 number_of_entries_(0),
153 SimpleIndexFile::IndexMetadata::IndexMetadata(
154 uint64 number_of_entries
, uint64 cache_size
)
155 : magic_number_(kSimpleIndexMagicNumber
),
156 version_(kSimpleVersion
),
157 number_of_entries_(number_of_entries
),
158 cache_size_(cache_size
) {}
160 void SimpleIndexFile::IndexMetadata::Serialize(Pickle
* pickle
) const {
162 pickle
->WriteUInt64(magic_number_
);
163 pickle
->WriteUInt32(version_
);
164 pickle
->WriteUInt64(number_of_entries_
);
165 pickle
->WriteUInt64(cache_size_
);
169 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified
,
171 if (!pickle
->WriteInt64(cache_modified
.ToInternalValue()))
173 SimpleIndexFile::PickleHeader
* header_p
= pickle
->headerT
<PickleHeader
>();
174 header_p
->crc
= CalculatePickleCRC(*pickle
);
178 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator
* it
) {
180 return it
->ReadUInt64(&magic_number_
) &&
181 it
->ReadUInt32(&version_
) &&
182 it
->ReadUInt64(&number_of_entries_
)&&
183 it
->ReadUInt64(&cache_size_
);
186 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type
,
187 const base::FilePath
& cache_directory
,
188 const base::FilePath
& index_filename
,
189 const base::FilePath
& temp_index_filename
,
190 scoped_ptr
<Pickle
> pickle
,
191 const base::TimeTicks
& start_time
,
192 bool app_on_background
) {
193 // There is a chance that the index containing all the necessary data about
194 // newly created entries will appear to be stale. This can happen if on-disk
195 // part of a Create operation does not fit into the time budget for the index
196 // flush delay. This simple approach will be reconsidered if it does not allow
197 // for maintaining freshness.
198 base::Time cache_dir_mtime
;
199 if (!simple_util::GetMTime(cache_directory
, &cache_dir_mtime
)) {
200 LOG(ERROR
) << "Could obtain information about cache age";
203 SerializeFinalData(cache_dir_mtime
, pickle
.get());
204 if (!WritePickleFile(pickle
.get(), temp_index_filename
)) {
205 if (!base::CreateDirectory(temp_index_filename
.DirName())) {
206 LOG(ERROR
) << "Could not create a directory to hold the index file";
209 if (!WritePickleFile(pickle
.get(), temp_index_filename
)) {
210 LOG(ERROR
) << "Failed to write the temporary index file";
215 // Atomically rename the temporary index file to become the real one.
216 bool result
= base::ReplaceFile(temp_index_filename
, index_filename
, NULL
);
219 if (app_on_background
) {
220 SIMPLE_CACHE_UMA(TIMES
,
221 "IndexWriteToDiskTime.Background", cache_type
,
222 (base::TimeTicks::Now() - start_time
));
224 SIMPLE_CACHE_UMA(TIMES
,
225 "IndexWriteToDiskTime.Foreground", cache_type
,
226 (base::TimeTicks::Now() - start_time
));
230 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
231 return number_of_entries_
<= kMaxEntiresInIndex
&&
232 magic_number_
== kSimpleIndexMagicNumber
&&
233 version_
== kSimpleVersion
;
236 SimpleIndexFile::SimpleIndexFile(
237 base::SingleThreadTaskRunner
* cache_thread
,
238 base::TaskRunner
* worker_pool
,
239 net::CacheType cache_type
,
240 const base::FilePath
& cache_directory
)
241 : cache_thread_(cache_thread
),
242 worker_pool_(worker_pool
),
243 cache_type_(cache_type
),
244 cache_directory_(cache_directory
),
245 index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
246 .AppendASCII(kIndexFileName
)),
247 temp_index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
248 .AppendASCII(kTempIndexFileName
)) {
251 SimpleIndexFile::~SimpleIndexFile() {}
253 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified
,
254 const base::Closure
& callback
,
255 SimpleIndexLoadResult
* out_result
) {
256 base::Closure task
= base::Bind(&SimpleIndexFile::SyncLoadIndexEntries
,
258 cache_last_modified
, cache_directory_
,
259 index_file_
, out_result
);
260 worker_pool_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
263 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet
& entry_set
,
265 const base::TimeTicks
& start
,
266 bool app_on_background
) {
267 IndexMetadata
index_metadata(entry_set
.size(), cache_size
);
268 scoped_ptr
<Pickle
> pickle
= Serialize(index_metadata
, entry_set
);
269 cache_thread_
->PostTask(FROM_HERE
, base::Bind(
270 &SimpleIndexFile::SyncWriteToDisk
,
275 base::Passed(&pickle
),
276 base::TimeTicks::Now(),
281 void SimpleIndexFile::SyncLoadIndexEntries(
282 net::CacheType cache_type
,
283 base::Time cache_last_modified
,
284 const base::FilePath
& cache_directory
,
285 const base::FilePath
& index_file_path
,
286 SimpleIndexLoadResult
* out_result
) {
287 // Load the index and find its age.
288 base::Time last_cache_seen_by_index
;
289 SyncLoadFromDisk(index_file_path
, &last_cache_seen_by_index
, out_result
);
291 // Consider the index loaded if it is fresh.
292 const bool index_file_existed
= base::PathExists(index_file_path
);
293 if (!out_result
->did_load
) {
294 if (index_file_existed
)
295 UmaRecordIndexFileState(INDEX_STATE_CORRUPT
, cache_type
);
297 if (cache_last_modified
<= last_cache_seen_by_index
) {
298 base::Time latest_dir_mtime
;
299 simple_util::GetMTime(cache_directory
, &latest_dir_mtime
);
300 if (LegacyIsIndexFileStale(latest_dir_mtime
, index_file_path
)) {
301 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES
,
304 UmaRecordIndexFileState(INDEX_STATE_FRESH
, cache_type
);
306 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED
, cache_type
);
309 UmaRecordIndexFileState(INDEX_STATE_STALE
, cache_type
);
312 // Reconstruct the index by scanning the disk for entries.
313 const base::TimeTicks start
= base::TimeTicks::Now();
314 SyncRestoreFromDisk(cache_directory
, index_file_path
, out_result
);
315 SIMPLE_CACHE_UMA(MEDIUM_TIMES
, "IndexRestoreTime", cache_type
,
316 base::TimeTicks::Now() - start
);
317 SIMPLE_CACHE_UMA(COUNTS
, "IndexEntriesRestored", cache_type
,
318 out_result
->entries
.size());
319 if (index_file_existed
) {
320 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED
, cache_type
);
322 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE
, cache_type
);
323 SIMPLE_CACHE_UMA(COUNTS
,
324 "IndexCreatedEntryCount", cache_type
,
325 out_result
->entries
.size());
330 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath
& index_filename
,
331 base::Time
* out_last_cache_seen_by_index
,
332 SimpleIndexLoadResult
* out_result
) {
335 base::MemoryMappedFile index_file_map
;
336 if (!index_file_map
.Initialize(index_filename
)) {
337 LOG(WARNING
) << "Could not map Simple Index file.";
338 base::DeleteFile(index_filename
, false);
342 SimpleIndexFile::Deserialize(
343 reinterpret_cast<const char*>(index_file_map
.data()),
344 index_file_map
.length(),
345 out_last_cache_seen_by_index
,
348 if (!out_result
->did_load
)
349 base::DeleteFile(index_filename
, false);
353 scoped_ptr
<Pickle
> SimpleIndexFile::Serialize(
354 const SimpleIndexFile::IndexMetadata
& index_metadata
,
355 const SimpleIndex::EntrySet
& entries
) {
356 scoped_ptr
<Pickle
> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader
)));
358 index_metadata
.Serialize(pickle
.get());
359 for (SimpleIndex::EntrySet::const_iterator it
= entries
.begin();
360 it
!= entries
.end(); ++it
) {
361 pickle
->WriteUInt64(it
->first
);
362 it
->second
.Serialize(pickle
.get());
364 return pickle
.Pass();
368 void SimpleIndexFile::Deserialize(const char* data
, int data_len
,
369 base::Time
* out_cache_last_modified
,
370 SimpleIndexLoadResult
* out_result
) {
374 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
376 Pickle
pickle(data
, data_len
);
377 if (!pickle
.data()) {
378 LOG(WARNING
) << "Corrupt Simple Index File.";
382 PickleIterator
pickle_it(pickle
);
383 SimpleIndexFile::PickleHeader
* header_p
=
384 pickle
.headerT
<SimpleIndexFile::PickleHeader
>();
385 const uint32 crc_read
= header_p
->crc
;
386 const uint32 crc_calculated
= CalculatePickleCRC(pickle
);
388 if (crc_read
!= crc_calculated
) {
389 LOG(WARNING
) << "Invalid CRC in Simple Index file.";
393 SimpleIndexFile::IndexMetadata index_metadata
;
394 if (!index_metadata
.Deserialize(&pickle_it
)) {
395 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
399 if (!index_metadata
.CheckIndexMetadata()) {
400 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
405 // TODO(gavinp): Consider using std::unordered_map.
406 entries
->resize(index_metadata
.GetNumberOfEntries() + kExtraSizeForMerge
);
408 while (entries
->size() < index_metadata
.GetNumberOfEntries()) {
410 EntryMetadata entry_metadata
;
411 if (!pickle_it
.ReadUInt64(&hash_key
) ||
412 !entry_metadata
.Deserialize(&pickle_it
)) {
413 LOG(WARNING
) << "Invalid EntryMetadata in Simple Index file.";
417 SimpleIndex::InsertInEntrySet(hash_key
, entry_metadata
, entries
);
420 int64 cache_last_modified
;
421 if (!pickle_it
.ReadInt64(&cache_last_modified
)) {
425 DCHECK(out_cache_last_modified
);
426 *out_cache_last_modified
= base::Time::FromInternalValue(cache_last_modified
);
428 out_result
->did_load
= true;
432 void SimpleIndexFile::SyncRestoreFromDisk(
433 const base::FilePath
& cache_directory
,
434 const base::FilePath
& index_file_path
,
435 SimpleIndexLoadResult
* out_result
) {
436 VLOG(1) << "Simple Cache Index is being restored from disk.";
437 base::DeleteFile(index_file_path
, /* recursive = */ false);
439 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
441 const bool did_succeed
= TraverseCacheDirectory(
442 cache_directory
, base::Bind(&ProcessEntryFile
, entries
));
444 LOG(ERROR
) << "Could not reconstruct index from disk";
447 out_result
->did_load
= true;
448 // When we restore from disk we write the merged index file to disk right
449 // away, this might save us from having to restore again next time.
450 out_result
->flush_required
= true;
454 bool SimpleIndexFile::LegacyIsIndexFileStale(
455 base::Time cache_last_modified
,
456 const base::FilePath
& index_file_path
) {
457 base::Time index_mtime
;
458 if (!simple_util::GetMTime(index_file_path
, &index_mtime
))
460 return index_mtime
< cache_last_modified
;
463 } // namespace disk_cache