1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
9 #include "base/file_util.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/metrics/histogram.h"
15 #include "base/pickle.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/task_runner_util.h"
18 #include "base/threading/thread_restrictions.h"
19 #include "net/disk_cache/simple/simple_entry_format.h"
20 #include "net/disk_cache/simple/simple_index.h"
21 #include "net/disk_cache/simple/simple_synchronous_entry.h"
22 #include "net/disk_cache/simple/simple_util.h"
23 #include "third_party/zlib/zlib.h"
28 const uint64 kMaxEntiresInIndex
= 100000000;
30 uint32
CalculatePickleCRC(const Pickle
& pickle
) {
31 return crc32(crc32(0, Z_NULL
, 0),
32 reinterpret_cast<const Bytef
*>(pickle
.payload()),
33 pickle
.payload_size());
36 void DoomEntrySetReply(const net::CompletionCallback
& reply_callback
,
38 reply_callback
.Run(result
);
41 void WriteToDiskInternal(const base::FilePath
& index_filename
,
42 const base::FilePath
& temp_index_filename
,
43 scoped_ptr
<Pickle
> pickle
,
44 const base::TimeTicks
& start_time
,
45 bool app_on_background
) {
46 int bytes_written
= file_util::WriteFile(
48 reinterpret_cast<const char*>(pickle
->data()),
50 DCHECK_EQ(bytes_written
, implicit_cast
<int>(pickle
->size()));
51 if (bytes_written
!= static_cast<int>(pickle
->size())) {
52 // TODO(felipeg): Add better error handling.
53 LOG(ERROR
) << "Could not write Simple Cache index to temporary file: "
54 << temp_index_filename
.value();
55 base::DeleteFile(temp_index_filename
, /* recursive = */ false);
57 // Swap temp and index_file.
58 bool result
= base::ReplaceFile(temp_index_filename
, index_filename
, NULL
);
61 if (app_on_background
) {
62 UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Background",
63 (base::TimeTicks::Now() - start_time
));
65 UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Foreground",
66 (base::TimeTicks::Now() - start_time
));
72 namespace disk_cache
{
74 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
75 flush_required(false) {
78 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
81 void SimpleIndexLoadResult::Reset() {
83 flush_required
= false;
88 const char SimpleIndexFile::kIndexFileName
[] = "the-real-index";
90 const char SimpleIndexFile::kTempIndexFileName
[] = "temp-index";
92 SimpleIndexFile::IndexMetadata::IndexMetadata() :
93 magic_number_(kSimpleIndexMagicNumber
),
94 version_(kSimpleVersion
),
95 number_of_entries_(0),
98 SimpleIndexFile::IndexMetadata::IndexMetadata(
99 uint64 number_of_entries
, uint64 cache_size
) :
100 magic_number_(kSimpleIndexMagicNumber
),
101 version_(kSimpleVersion
),
102 number_of_entries_(number_of_entries
),
103 cache_size_(cache_size
) {}
105 void SimpleIndexFile::IndexMetadata::Serialize(Pickle
* pickle
) const {
107 pickle
->WriteUInt64(magic_number_
);
108 pickle
->WriteUInt32(version_
);
109 pickle
->WriteUInt64(number_of_entries_
);
110 pickle
->WriteUInt64(cache_size_
);
113 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator
* it
) {
115 return it
->ReadUInt64(&magic_number_
) &&
116 it
->ReadUInt32(&version_
) &&
117 it
->ReadUInt64(&number_of_entries_
)&&
118 it
->ReadUInt64(&cache_size_
);
121 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
122 return number_of_entries_
<= kMaxEntiresInIndex
&&
123 magic_number_
== disk_cache::kSimpleIndexMagicNumber
&&
124 version_
== disk_cache::kSimpleVersion
;
127 SimpleIndexFile::SimpleIndexFile(
128 base::SingleThreadTaskRunner
* cache_thread
,
129 base::TaskRunner
* worker_pool
,
130 const base::FilePath
& cache_directory
)
131 : cache_thread_(cache_thread
),
132 worker_pool_(worker_pool
),
133 cache_directory_(cache_directory
),
134 index_file_(cache_directory_
.AppendASCII(kIndexFileName
)),
135 temp_index_file_(cache_directory_
.AppendASCII(kTempIndexFileName
)) {
138 SimpleIndexFile::~SimpleIndexFile() {}
140 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified
,
141 const base::Closure
& callback
,
142 SimpleIndexLoadResult
* out_result
) {
143 base::Closure task
= base::Bind(&SimpleIndexFile::SyncLoadIndexEntries
,
144 cache_last_modified
, cache_directory_
,
145 index_file_
, out_result
);
146 worker_pool_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
149 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet
& entry_set
,
151 const base::TimeTicks
& start
,
152 bool app_on_background
) {
153 IndexMetadata
index_metadata(entry_set
.size(), cache_size
);
154 scoped_ptr
<Pickle
> pickle
= Serialize(index_metadata
, entry_set
);
155 cache_thread_
->PostTask(FROM_HERE
, base::Bind(
156 &WriteToDiskInternal
,
159 base::Passed(&pickle
),
160 base::TimeTicks::Now(),
164 void SimpleIndexFile::DoomEntrySet(
165 scoped_ptr
<std::vector
<uint64
> > entry_hashes
,
166 const net::CompletionCallback
& reply_callback
) {
167 PostTaskAndReplyWithResult(
170 base::Bind(&SimpleSynchronousEntry::DoomEntrySet
,
171 base::Passed(entry_hashes
.Pass()), cache_directory_
),
172 base::Bind(&DoomEntrySetReply
, reply_callback
));
176 void SimpleIndexFile::SyncLoadIndexEntries(
177 base::Time cache_last_modified
,
178 const base::FilePath
& cache_directory
,
179 const base::FilePath
& index_file_path
,
180 SimpleIndexLoadResult
* out_result
) {
181 // TODO(felipeg): probably could load a stale index and use it for something.
182 const SimpleIndex::EntrySet
& entries
= out_result
->entries
;
184 const bool index_file_exists
= base::PathExists(index_file_path
);
186 // Used in histograms. Please only add new values at the end.
188 INDEX_STATE_CORRUPT
= 0,
189 INDEX_STATE_STALE
= 1,
190 INDEX_STATE_FRESH
= 2,
191 INDEX_STATE_FRESH_CONCURRENT_UPDATES
= 3,
195 // Only load if the index is not stale.
196 if (IsIndexFileStale(cache_last_modified
, index_file_path
)) {
197 index_file_state
= INDEX_STATE_STALE
;
199 index_file_state
= INDEX_STATE_FRESH
;
200 base::Time latest_dir_mtime
;
201 if (simple_util::GetMTime(cache_directory
, &latest_dir_mtime
) &&
202 IsIndexFileStale(latest_dir_mtime
, index_file_path
)) {
203 // A file operation has updated the directory since we last looked at it
204 // during backend initialization.
205 index_file_state
= INDEX_STATE_FRESH_CONCURRENT_UPDATES
;
208 const base::TimeTicks start
= base::TimeTicks::Now();
209 SyncLoadFromDisk(index_file_path
, out_result
);
210 UMA_HISTOGRAM_TIMES("SimpleCache.IndexLoadTime",
211 base::TimeTicks::Now() - start
);
212 UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesLoaded",
213 out_result
->did_load
? entries
.size() : 0);
214 if (!out_result
->did_load
)
215 index_file_state
= INDEX_STATE_CORRUPT
;
217 UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexFileStateOnLoad",
221 if (!out_result
->did_load
) {
222 const base::TimeTicks start
= base::TimeTicks::Now();
223 SyncRestoreFromDisk(cache_directory
, index_file_path
, out_result
);
224 UMA_HISTOGRAM_MEDIUM_TIMES("SimpleCache.IndexRestoreTime",
225 base::TimeTicks::Now() - start
);
226 UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesRestored",
230 // Used in histograms. Please only add new values at the end.
232 INITIALIZE_METHOD_RECOVERED
= 0,
233 INITIALIZE_METHOD_LOADED
= 1,
234 INITIALIZE_METHOD_NEWCACHE
= 2,
235 INITIALIZE_METHOD_MAX
= 3,
237 int initialize_method
;
238 if (index_file_exists
) {
239 if (out_result
->flush_required
)
240 initialize_method
= INITIALIZE_METHOD_RECOVERED
;
242 initialize_method
= INITIALIZE_METHOD_LOADED
;
244 UMA_HISTOGRAM_COUNTS("SimpleCache.IndexCreatedEntryCount",
246 initialize_method
= INITIALIZE_METHOD_NEWCACHE
;
249 UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexInitializeMethod",
250 initialize_method
, INITIALIZE_METHOD_MAX
);
254 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath
& index_filename
,
255 SimpleIndexLoadResult
* out_result
) {
258 base::MemoryMappedFile index_file_map
;
259 if (!index_file_map
.Initialize(index_filename
)) {
260 LOG(WARNING
) << "Could not map Simple Index file.";
261 base::DeleteFile(index_filename
, false);
265 SimpleIndexFile::Deserialize(
266 reinterpret_cast<const char*>(index_file_map
.data()),
267 index_file_map
.length(), out_result
);
269 if (!out_result
->did_load
)
270 base::DeleteFile(index_filename
, false);
274 scoped_ptr
<Pickle
> SimpleIndexFile::Serialize(
275 const SimpleIndexFile::IndexMetadata
& index_metadata
,
276 const SimpleIndex::EntrySet
& entries
) {
277 scoped_ptr
<Pickle
> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader
)));
279 index_metadata
.Serialize(pickle
.get());
280 for (SimpleIndex::EntrySet::const_iterator it
= entries
.begin();
281 it
!= entries
.end(); ++it
) {
282 pickle
->WriteUInt64(it
->first
);
283 it
->second
.Serialize(pickle
.get());
285 SimpleIndexFile::PickleHeader
* header_p
=
286 pickle
->headerT
<SimpleIndexFile::PickleHeader
>();
287 header_p
->crc
= CalculatePickleCRC(*pickle
);
288 return pickle
.Pass();
292 void SimpleIndexFile::Deserialize(const char* data
, int data_len
,
293 SimpleIndexLoadResult
* out_result
) {
297 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
299 Pickle
pickle(data
, data_len
);
300 if (!pickle
.data()) {
301 LOG(WARNING
) << "Corrupt Simple Index File.";
305 PickleIterator
pickle_it(pickle
);
307 SimpleIndexFile::PickleHeader
* header_p
=
308 pickle
.headerT
<SimpleIndexFile::PickleHeader
>();
309 const uint32 crc_read
= header_p
->crc
;
310 const uint32 crc_calculated
= CalculatePickleCRC(pickle
);
312 if (crc_read
!= crc_calculated
) {
313 LOG(WARNING
) << "Invalid CRC in Simple Index file.";
317 SimpleIndexFile::IndexMetadata index_metadata
;
318 if (!index_metadata
.Deserialize(&pickle_it
)) {
319 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
323 if (!index_metadata
.CheckIndexMetadata()) {
324 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
329 // TODO(gavinp): Consider using std::unordered_map.
330 entries
->resize(index_metadata
.GetNumberOfEntries() + kExtraSizeForMerge
);
332 while (entries
->size() < index_metadata
.GetNumberOfEntries()) {
334 EntryMetadata entry_metadata
;
335 if (!pickle_it
.ReadUInt64(&hash_key
) ||
336 !entry_metadata
.Deserialize(&pickle_it
)) {
337 LOG(WARNING
) << "Invalid EntryMetadata in Simple Index file.";
341 SimpleIndex::InsertInEntrySet(hash_key
, entry_metadata
, entries
);
344 out_result
->did_load
= true;
348 void SimpleIndexFile::SyncRestoreFromDisk(
349 const base::FilePath
& cache_directory
,
350 const base::FilePath
& index_file_path
,
351 SimpleIndexLoadResult
* out_result
) {
352 LOG(INFO
) << "Simple Cache Index is being restored from disk.";
354 base::DeleteFile(index_file_path
, /* recursive = */ false);
356 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
358 // TODO(felipeg,gavinp): Fix this once we have a one-file per entry format.
359 COMPILE_ASSERT(kSimpleEntryFileCount
== 3,
360 file_pattern_must_match_file_count
);
362 const int kFileSuffixLength
= sizeof("_0") - 1;
363 const base::FilePath::StringType file_pattern
= FILE_PATH_LITERAL("*_[0-2]");
364 base::FileEnumerator
enumerator(cache_directory
,
365 false /* recursive */,
366 base::FileEnumerator::FILES
,
368 for (base::FilePath file_path
= enumerator
.Next(); !file_path
.empty();
369 file_path
= enumerator
.Next()) {
370 const base::FilePath::StringType base_name
= file_path
.BaseName().value();
371 // Converting to std::string is OK since we never use UTF8 wide chars in our
373 const std::string
hash_key_string(base_name
.begin(),
374 base_name
.end() - kFileSuffixLength
);
376 if (!simple_util::GetEntryHashKeyFromHexString(
377 hash_key_string
, &hash_key
)) {
378 LOG(WARNING
) << "Invalid Entry Hash Key filename while restoring "
379 << "Simple Index from disk: " << base_name
;
380 // TODO(felipeg): Should we delete the invalid file here ?
384 base::FileEnumerator::FileInfo info
= enumerator
.GetInfo();
385 base::Time last_used_time
;
386 #if defined(OS_POSIX)
387 // For POSIX systems, a last access time is available. However, it's not
388 // guaranteed to be more accurate than mtime. It is no worse though.
389 last_used_time
= base::Time::FromTimeT(info
.stat().st_atime
);
391 if (last_used_time
.is_null())
392 last_used_time
= info
.GetLastModifiedTime();
394 int64 file_size
= info
.GetSize();
395 SimpleIndex::EntrySet::iterator it
= entries
->find(hash_key
);
396 if (it
== entries
->end()) {
397 SimpleIndex::InsertInEntrySet(
399 EntryMetadata(last_used_time
, file_size
),
402 // Summing up the total size of the entry through all the *_[0-2] files
403 it
->second
.SetEntrySize(it
->second
.GetEntrySize() + file_size
);
407 out_result
->did_load
= true;
409 // When we restore from disk we write the merged index file to disk right
410 // away, this might save us from having to restore again next time.
411 out_result
->flush_required
= true;
415 bool SimpleIndexFile::IsIndexFileStale(base::Time cache_last_modified
,
416 const base::FilePath
& index_file_path
) {
417 base::Time index_mtime
;
418 if (!simple_util::GetMTime(index_file_path
, &index_mtime
))
420 return index_mtime
< cache_last_modified
;
423 } // namespace disk_cache