1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
9 #include "base/files/file.h"
10 #include "base/files/file_util.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/numerics/safe_conversions.h"
15 #include "base/pickle.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/task_runner_util.h"
18 #include "base/threading/thread_restrictions.h"
19 #include "net/disk_cache/simple/simple_backend_version.h"
20 #include "net/disk_cache/simple/simple_entry_format.h"
21 #include "net/disk_cache/simple/simple_histogram_macros.h"
22 #include "net/disk_cache/simple/simple_index.h"
23 #include "net/disk_cache/simple/simple_synchronous_entry.h"
24 #include "net/disk_cache/simple/simple_util.h"
25 #include "third_party/zlib/zlib.h"
29 namespace disk_cache
{
32 const int kEntryFilesHashLength
= 16;
33 const int kEntryFilesSuffixLength
= 2;
35 const uint64 kMaxEntiresInIndex
= 100000000;
37 uint32
CalculatePickleCRC(const base::Pickle
& pickle
) {
38 return crc32(crc32(0, Z_NULL
, 0),
39 reinterpret_cast<const Bytef
*>(pickle
.payload()),
40 pickle
.payload_size());
43 // Used in histograms. Please only add new values at the end.
45 INDEX_STATE_CORRUPT
= 0,
46 INDEX_STATE_STALE
= 1,
47 INDEX_STATE_FRESH
= 2,
48 INDEX_STATE_FRESH_CONCURRENT_UPDATES
= 3,
52 void UmaRecordIndexFileState(IndexFileState state
, net::CacheType cache_type
) {
53 SIMPLE_CACHE_UMA(ENUMERATION
,
54 "IndexFileStateOnLoad", cache_type
, state
, INDEX_STATE_MAX
);
57 // Used in histograms. Please only add new values at the end.
58 enum IndexInitMethod
{
59 INITIALIZE_METHOD_RECOVERED
= 0,
60 INITIALIZE_METHOD_LOADED
= 1,
61 INITIALIZE_METHOD_NEWCACHE
= 2,
62 INITIALIZE_METHOD_MAX
= 3,
65 void UmaRecordIndexInitMethod(IndexInitMethod method
,
66 net::CacheType cache_type
) {
67 SIMPLE_CACHE_UMA(ENUMERATION
,
68 "IndexInitializeMethod", cache_type
,
69 method
, INITIALIZE_METHOD_MAX
);
72 bool WritePickleFile(base::Pickle
* pickle
, const base::FilePath
& file_name
) {
75 File::FLAG_CREATE
| File::FLAG_WRITE
| File::FLAG_SHARE_DELETE
);
80 file
.Write(0, static_cast<const char*>(pickle
->data()), pickle
->size());
81 if (bytes_written
!= base::checked_cast
<int>(pickle
->size())) {
82 simple_util::SimpleCacheDeleteFile(file_name
);
88 // Called for each cache directory traversal iteration.
89 void ProcessEntryFile(SimpleIndex::EntrySet
* entries
,
90 const base::FilePath
& file_path
) {
91 static const size_t kEntryFilesLength
=
92 kEntryFilesHashLength
+ kEntryFilesSuffixLength
;
93 // Converting to std::string is OK since we never use UTF8 wide chars in our
95 const base::FilePath::StringType base_name
= file_path
.BaseName().value();
96 const std::string
file_name(base_name
.begin(), base_name
.end());
97 if (file_name
.size() != kEntryFilesLength
)
99 const base::StringPiece
hash_string(
100 file_name
.begin(), file_name
.begin() + kEntryFilesHashLength
);
102 if (!simple_util::GetEntryHashKeyFromHexString(hash_string
, &hash_key
)) {
103 LOG(WARNING
) << "Invalid entry hash key filename while restoring index from"
104 << " disk: " << file_name
;
108 File::Info file_info
;
109 if (!base::GetFileInfo(file_path
, &file_info
)) {
110 LOG(ERROR
) << "Could not get file info for " << file_path
.value();
113 base::Time last_used_time
;
114 #if defined(OS_POSIX)
115 // For POSIX systems, a last access time is available. However, it's not
116 // guaranteed to be more accurate than mtime. It is no worse though.
117 last_used_time
= file_info
.last_accessed
;
119 if (last_used_time
.is_null())
120 last_used_time
= file_info
.last_modified
;
122 int64 file_size
= file_info
.size
;
123 SimpleIndex::EntrySet::iterator it
= entries
->find(hash_key
);
124 if (it
== entries
->end()) {
125 SimpleIndex::InsertInEntrySet(
127 EntryMetadata(last_used_time
, file_size
),
130 // Summing up the total size of the entry through all the *_[0-1] files
131 it
->second
.SetEntrySize(it
->second
.GetEntrySize() + file_size
);
137 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
138 flush_required(false) {
141 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
144 void SimpleIndexLoadResult::Reset() {
146 flush_required
= false;
151 const char SimpleIndexFile::kIndexFileName
[] = "the-real-index";
153 const char SimpleIndexFile::kIndexDirectory
[] = "index-dir";
155 const char SimpleIndexFile::kTempIndexFileName
[] = "temp-index";
157 SimpleIndexFile::IndexMetadata::IndexMetadata()
158 : magic_number_(kSimpleIndexMagicNumber
),
159 version_(kSimpleVersion
),
160 number_of_entries_(0),
163 SimpleIndexFile::IndexMetadata::IndexMetadata(
164 uint64 number_of_entries
, uint64 cache_size
)
165 : magic_number_(kSimpleIndexMagicNumber
),
166 version_(kSimpleVersion
),
167 number_of_entries_(number_of_entries
),
168 cache_size_(cache_size
) {}
170 void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle
* pickle
) const {
172 pickle
->WriteUInt64(magic_number_
);
173 pickle
->WriteUInt32(version_
);
174 pickle
->WriteUInt64(number_of_entries_
);
175 pickle
->WriteUInt64(cache_size_
);
179 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified
,
180 base::Pickle
* pickle
) {
181 if (!pickle
->WriteInt64(cache_modified
.ToInternalValue()))
183 SimpleIndexFile::PickleHeader
* header_p
= pickle
->headerT
<PickleHeader
>();
184 header_p
->crc
= CalculatePickleCRC(*pickle
);
188 bool SimpleIndexFile::IndexMetadata::Deserialize(base::PickleIterator
* it
) {
190 return it
->ReadUInt64(&magic_number_
) &&
191 it
->ReadUInt32(&version_
) &&
192 it
->ReadUInt64(&number_of_entries_
)&&
193 it
->ReadUInt64(&cache_size_
);
196 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type
,
197 const base::FilePath
& cache_directory
,
198 const base::FilePath
& index_filename
,
199 const base::FilePath
& temp_index_filename
,
200 scoped_ptr
<base::Pickle
> pickle
,
201 const base::TimeTicks
& start_time
,
202 bool app_on_background
) {
203 DCHECK_EQ(index_filename
.DirName().value(),
204 temp_index_filename
.DirName().value());
205 base::FilePath index_file_directory
= temp_index_filename
.DirName();
206 if (!base::DirectoryExists(index_file_directory
) &&
207 !base::CreateDirectory(index_file_directory
)) {
208 LOG(ERROR
) << "Could not create a directory to hold the index file";
212 // There is a chance that the index containing all the necessary data about
213 // newly created entries will appear to be stale. This can happen if on-disk
214 // part of a Create operation does not fit into the time budget for the index
215 // flush delay. This simple approach will be reconsidered if it does not allow
216 // for maintaining freshness.
217 base::Time cache_dir_mtime
;
218 if (!simple_util::GetMTime(cache_directory
, &cache_dir_mtime
)) {
219 LOG(ERROR
) << "Could obtain information about cache age";
222 SerializeFinalData(cache_dir_mtime
, pickle
.get());
223 if (!WritePickleFile(pickle
.get(), temp_index_filename
)) {
224 LOG(ERROR
) << "Failed to write the temporary index file";
228 // Atomically rename the temporary index file to become the real one.
229 // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
230 // The rename failing during shutdown is legal because it's legal to begin
231 // erasing a cache as soon as the destructor has been called.
232 if (!base::ReplaceFile(temp_index_filename
, index_filename
, NULL
))
235 if (app_on_background
) {
236 SIMPLE_CACHE_UMA(TIMES
,
237 "IndexWriteToDiskTime.Background", cache_type
,
238 (base::TimeTicks::Now() - start_time
));
240 SIMPLE_CACHE_UMA(TIMES
,
241 "IndexWriteToDiskTime.Foreground", cache_type
,
242 (base::TimeTicks::Now() - start_time
));
246 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
247 return number_of_entries_
<= kMaxEntiresInIndex
&&
248 magic_number_
== kSimpleIndexMagicNumber
&&
249 version_
== kSimpleVersion
;
252 SimpleIndexFile::SimpleIndexFile(
253 const scoped_refptr
<base::SingleThreadTaskRunner
>& cache_thread
,
254 const scoped_refptr
<base::TaskRunner
>& worker_pool
,
255 net::CacheType cache_type
,
256 const base::FilePath
& cache_directory
)
257 : cache_thread_(cache_thread
),
258 worker_pool_(worker_pool
),
259 cache_type_(cache_type
),
260 cache_directory_(cache_directory
),
261 index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
262 .AppendASCII(kIndexFileName
)),
263 temp_index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
264 .AppendASCII(kTempIndexFileName
)) {
267 SimpleIndexFile::~SimpleIndexFile() {}
269 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified
,
270 const base::Closure
& callback
,
271 SimpleIndexLoadResult
* out_result
) {
272 base::Closure task
= base::Bind(&SimpleIndexFile::SyncLoadIndexEntries
,
274 cache_last_modified
, cache_directory_
,
275 index_file_
, out_result
);
276 worker_pool_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
279 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet
& entry_set
,
281 const base::TimeTicks
& start
,
282 bool app_on_background
,
283 const base::Closure
& callback
) {
284 IndexMetadata
index_metadata(entry_set
.size(), cache_size
);
285 scoped_ptr
<base::Pickle
> pickle
= Serialize(index_metadata
, entry_set
);
287 base::Bind(&SimpleIndexFile::SyncWriteToDisk
,
288 cache_type_
, cache_directory_
, index_file_
, temp_index_file_
,
289 base::Passed(&pickle
), start
, app_on_background
);
290 if (callback
.is_null())
291 cache_thread_
->PostTask(FROM_HERE
, task
);
293 cache_thread_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
297 void SimpleIndexFile::SyncLoadIndexEntries(
298 net::CacheType cache_type
,
299 base::Time cache_last_modified
,
300 const base::FilePath
& cache_directory
,
301 const base::FilePath
& index_file_path
,
302 SimpleIndexLoadResult
* out_result
) {
303 // Load the index and find its age.
304 base::Time last_cache_seen_by_index
;
305 SyncLoadFromDisk(index_file_path
, &last_cache_seen_by_index
, out_result
);
307 // Consider the index loaded if it is fresh.
308 const bool index_file_existed
= base::PathExists(index_file_path
);
309 if (!out_result
->did_load
) {
310 if (index_file_existed
)
311 UmaRecordIndexFileState(INDEX_STATE_CORRUPT
, cache_type
);
313 if (cache_last_modified
<= last_cache_seen_by_index
) {
314 base::Time latest_dir_mtime
;
315 simple_util::GetMTime(cache_directory
, &latest_dir_mtime
);
316 if (LegacyIsIndexFileStale(latest_dir_mtime
, index_file_path
)) {
317 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES
,
320 UmaRecordIndexFileState(INDEX_STATE_FRESH
, cache_type
);
322 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED
, cache_type
);
325 UmaRecordIndexFileState(INDEX_STATE_STALE
, cache_type
);
328 // Reconstruct the index by scanning the disk for entries.
329 const base::TimeTicks start
= base::TimeTicks::Now();
330 SyncRestoreFromDisk(cache_directory
, index_file_path
, out_result
);
331 SIMPLE_CACHE_UMA(MEDIUM_TIMES
, "IndexRestoreTime", cache_type
,
332 base::TimeTicks::Now() - start
);
333 SIMPLE_CACHE_UMA(COUNTS
, "IndexEntriesRestored", cache_type
,
334 out_result
->entries
.size());
335 if (index_file_existed
) {
336 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED
, cache_type
);
338 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE
, cache_type
);
339 SIMPLE_CACHE_UMA(COUNTS
,
340 "IndexCreatedEntryCount", cache_type
,
341 out_result
->entries
.size());
346 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath
& index_filename
,
347 base::Time
* out_last_cache_seen_by_index
,
348 SimpleIndexLoadResult
* out_result
) {
351 File
file(index_filename
,
352 File::FLAG_OPEN
| File::FLAG_READ
| File::FLAG_SHARE_DELETE
);
356 base::MemoryMappedFile index_file_map
;
357 if (!index_file_map
.Initialize(file
.Pass())) {
358 simple_util::SimpleCacheDeleteFile(index_filename
);
362 SimpleIndexFile::Deserialize(
363 reinterpret_cast<const char*>(index_file_map
.data()),
364 index_file_map
.length(),
365 out_last_cache_seen_by_index
,
368 if (!out_result
->did_load
)
369 simple_util::SimpleCacheDeleteFile(index_filename
);
373 scoped_ptr
<base::Pickle
> SimpleIndexFile::Serialize(
374 const SimpleIndexFile::IndexMetadata
& index_metadata
,
375 const SimpleIndex::EntrySet
& entries
) {
376 scoped_ptr
<base::Pickle
> pickle(
377 new base::Pickle(sizeof(SimpleIndexFile::PickleHeader
)));
379 index_metadata
.Serialize(pickle
.get());
380 for (SimpleIndex::EntrySet::const_iterator it
= entries
.begin();
381 it
!= entries
.end(); ++it
) {
382 pickle
->WriteUInt64(it
->first
);
383 it
->second
.Serialize(pickle
.get());
385 return pickle
.Pass();
389 void SimpleIndexFile::Deserialize(const char* data
, int data_len
,
390 base::Time
* out_cache_last_modified
,
391 SimpleIndexLoadResult
* out_result
) {
395 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
397 base::Pickle
pickle(data
, data_len
);
398 if (!pickle
.data()) {
399 LOG(WARNING
) << "Corrupt Simple Index File.";
403 base::PickleIterator
pickle_it(pickle
);
404 SimpleIndexFile::PickleHeader
* header_p
=
405 pickle
.headerT
<SimpleIndexFile::PickleHeader
>();
406 const uint32 crc_read
= header_p
->crc
;
407 const uint32 crc_calculated
= CalculatePickleCRC(pickle
);
409 if (crc_read
!= crc_calculated
) {
410 LOG(WARNING
) << "Invalid CRC in Simple Index file.";
414 SimpleIndexFile::IndexMetadata index_metadata
;
415 if (!index_metadata
.Deserialize(&pickle_it
)) {
416 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
420 if (!index_metadata
.CheckIndexMetadata()) {
421 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
426 // TODO(gavinp): Consider using std::unordered_map.
427 entries
->resize(index_metadata
.GetNumberOfEntries() + kExtraSizeForMerge
);
429 while (entries
->size() < index_metadata
.GetNumberOfEntries()) {
431 EntryMetadata entry_metadata
;
432 if (!pickle_it
.ReadUInt64(&hash_key
) ||
433 !entry_metadata
.Deserialize(&pickle_it
)) {
434 LOG(WARNING
) << "Invalid EntryMetadata in Simple Index file.";
438 SimpleIndex::InsertInEntrySet(hash_key
, entry_metadata
, entries
);
441 int64 cache_last_modified
;
442 if (!pickle_it
.ReadInt64(&cache_last_modified
)) {
446 DCHECK(out_cache_last_modified
);
447 *out_cache_last_modified
= base::Time::FromInternalValue(cache_last_modified
);
449 out_result
->did_load
= true;
453 void SimpleIndexFile::SyncRestoreFromDisk(
454 const base::FilePath
& cache_directory
,
455 const base::FilePath
& index_file_path
,
456 SimpleIndexLoadResult
* out_result
) {
457 VLOG(1) << "Simple Cache Index is being restored from disk.";
458 simple_util::SimpleCacheDeleteFile(index_file_path
);
460 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
462 const bool did_succeed
= TraverseCacheDirectory(
463 cache_directory
, base::Bind(&ProcessEntryFile
, entries
));
465 LOG(ERROR
) << "Could not reconstruct index from disk";
468 out_result
->did_load
= true;
469 // When we restore from disk we write the merged index file to disk right
470 // away, this might save us from having to restore again next time.
471 out_result
->flush_required
= true;
475 bool SimpleIndexFile::LegacyIsIndexFileStale(
476 base::Time cache_last_modified
,
477 const base::FilePath
& index_file_path
) {
478 base::Time index_mtime
;
479 if (!simple_util::GetMTime(index_file_path
, &index_mtime
))
481 return index_mtime
< cache_last_modified
;
484 } // namespace disk_cache