Disable view source for Developer Tools.
[chromium-blink-merge.git] / chrome / browser / safe_browsing / safe_browsing_store_file.h
blob905843e642a44fb65abff1509f52ebb7581ce16b
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_
8 #include <set>
9 #include <vector>
11 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
13 #include "base/callback.h"
14 #include "base/file_util.h"
16 // Implement SafeBrowsingStore in terms of a flat file. The file
17 // format is pretty literal:
19 // int32 magic; // magic number "validating" file
20 // int32 version; // format version
22 // // Counts for the various data which follows the header.
23 // uint32 add_chunk_count; // Chunks seen, including empties.
24 // uint32 sub_chunk_count; // Ditto.
25 // uint32 add_prefix_count;
26 // uint32 sub_prefix_count;
27 // uint32 add_hash_count;
28 // uint32 sub_hash_count;
30 // array[add_chunk_count] {
31 // int32 chunk_id;
32 // }
33 // array[sub_chunk_count] {
34 // int32 chunk_id;
35 // }
36 // array[add_prefix_count] {
37 // int32 chunk_id;
38 // int32 prefix;
39 // }
40 // array[sub_prefix_count] {
41 // int32 chunk_id;
42 // int32 add_chunk_id;
43 // int32 add_prefix;
44 // }
45 // array[add_hash_count] {
46 // int32 chunk_id;
47 // int32 received_time; // From base::Time::ToTimeT().
48 // char[32] full_hash;
49 // array[sub_hash_count] {
50 // int32 chunk_id;
51 // int32 add_chunk_id;
52 // char[32] add_full_hash;
53 // }
54 // MD5Digest checksum; // Checksum over preceeding data.
56 // During the course of an update, uncommitted data is stored in a
57 // temporary file (which is later re-used to commit). This is an
58 // array of chunks, with the count kept in memory until the end of the
59 // transaction. The format of this file is like the main file, with
60 // the list of chunks seen omitted, as that data is tracked in-memory:
62 // array[] {
63 // uint32 add_prefix_count;
64 // uint32 sub_prefix_count;
65 // uint32 add_hash_count;
66 // uint32 sub_hash_count;
67 // array[add_prefix_count] {
68 // int32 chunk_id;
69 // int32 prefix;
70 // }
71 // array[sub_prefix_count] {
72 // int32 chunk_id;
73 // int32 add_chunk_id;
74 // int32 add_prefix;
75 // }
76 // array[add_hash_count] {
77 // int32 chunk_id;
78 // int32 received_time; // From base::Time::ToTimeT().
79 // char[32] full_hash;
80 // }
81 // array[sub_hash_count] {
82 // int32 chunk_id;
83 // int32 add_chunk_id;
84 // char[32] add_full_hash;
85 // }
86 // }
88 // The overall transaction works like this:
89 // - Open the original file to get the chunks-seen data.
90 // - Open a temp file for storing new chunk info.
91 // - Write new chunks to the temp file.
92 // - When the transaction is finished:
93 // - Read the rest of the original file's data into buffers.
94 // - Rewind the temp file and merge the new data into buffers.
95 // - Process buffers for deletions and apply subs.
96 // - Rewind and write the buffers out to temp file.
97 // - Delete original file.
98 // - Rename temp file to original filename.
100 // TODO(shess): By using a checksum, this code can avoid doing an
101 // fsync(), at the possible cost of more frequently retrieving the
102 // full dataset. Measure how often this occurs, and if it occurs too
103 // often, consider retaining the last known-good file for recovery
104 // purposes, rather than deleting it.
106 class SafeBrowsingStoreFile : public SafeBrowsingStore {
107 public:
108 SafeBrowsingStoreFile();
109 virtual ~SafeBrowsingStoreFile();
111 virtual void Init(const base::FilePath& filename,
112 const base::Closure& corruption_callback) OVERRIDE;
114 // Delete any on-disk files, including the permanent storage.
115 virtual bool Delete() OVERRIDE;
117 // Get all add hash prefixes and full-length hashes, respectively, from
118 // the store.
119 virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) OVERRIDE;
120 virtual bool GetAddFullHashes(
121 std::vector<SBAddFullHash>* add_full_hashes) OVERRIDE;
123 virtual bool BeginChunk() OVERRIDE;
125 virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) OVERRIDE;
126 virtual bool WriteAddHash(int32 chunk_id,
127 base::Time receive_time,
128 const SBFullHash& full_hash) OVERRIDE;
129 virtual bool WriteSubPrefix(int32 chunk_id,
130 int32 add_chunk_id, SBPrefix prefix) OVERRIDE;
131 virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
132 const SBFullHash& full_hash) OVERRIDE;
133 virtual bool FinishChunk() OVERRIDE;
135 virtual bool BeginUpdate() OVERRIDE;
136 // Store updates with pending add full hashes in file store and
137 // return |add_prefixes_result| and |add_full_hashes_result|.
138 virtual bool FinishUpdate(
139 const std::vector<SBAddFullHash>& pending_adds,
140 SBAddPrefixes* add_prefixes_result,
141 std::vector<SBAddFullHash>* add_full_hashes_result) OVERRIDE;
142 virtual bool CancelUpdate() OVERRIDE;
144 virtual void SetAddChunk(int32 chunk_id) OVERRIDE;
145 virtual bool CheckAddChunk(int32 chunk_id) OVERRIDE;
146 virtual void GetAddChunks(std::vector<int32>* out) OVERRIDE;
147 virtual void SetSubChunk(int32 chunk_id) OVERRIDE;
148 virtual bool CheckSubChunk(int32 chunk_id) OVERRIDE;
149 virtual void GetSubChunks(std::vector<int32>* out) OVERRIDE;
151 virtual void DeleteAddChunk(int32 chunk_id) OVERRIDE;
152 virtual void DeleteSubChunk(int32 chunk_id) OVERRIDE;
154 // Verify |file_|'s checksum, calling the corruption callback if it
155 // does not check out. Empty input is considered valid.
156 virtual bool CheckValidity() OVERRIDE;
158 // Returns the name of the temporary file used to buffer data for
159 // |filename|. Exported for unit tests.
160 static const base::FilePath TemporaryFileForFilename(
161 const base::FilePath& filename) {
162 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));
165 // Delete any on-disk files, including the permanent storage.
166 static bool DeleteStore(const base::FilePath& basename);
168 private:
169 // Update store file with pending full hashes.
170 virtual bool DoUpdate(const std::vector<SBAddFullHash>& pending_adds,
171 SBAddPrefixes* add_prefixes_result,
172 std::vector<SBAddFullHash>* add_full_hashes_result);
174 // Enumerate different format-change events for histogramming
175 // purposes. DO NOT CHANGE THE ORDERING OF THESE VALUES.
176 // TODO(shess): Remove this once the format change is complete.
177 enum FormatEventType {
178 // Corruption detected, broken down by file format.
179 FORMAT_EVENT_FILE_CORRUPT,
180 FORMAT_EVENT_SQLITE_CORRUPT, // Obsolete
182 // The type of format found in the file. The expected case (new
183 // file format) is intentionally not covered.
184 FORMAT_EVENT_FOUND_SQLITE,
185 FORMAT_EVENT_FOUND_UNKNOWN,
187 // The number of SQLite-format files deleted should be the same as
188 // FORMAT_EVENT_FOUND_SQLITE. It can differ if the delete fails,
189 // or if a failure prevents the update from succeeding.
190 FORMAT_EVENT_SQLITE_DELETED, // Obsolete
191 FORMAT_EVENT_SQLITE_DELETE_FAILED, // Obsolete
193 // Found and deleted (or failed to delete) the ancient "Safe
194 // Browsing" file.
195 FORMAT_EVENT_DELETED_ORIGINAL,
196 FORMAT_EVENT_DELETED_ORIGINAL_FAILED,
198 // The checksum did not check out in CheckValidity() or in
199 // FinishUpdate(). This most likely indicates that the machine
200 // crashed before the file was fully sync'ed to disk.
201 FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE,
202 FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE,
204 // Memory space for histograms is determined by the max. ALWAYS
205 // ADD NEW VALUES BEFORE THIS ONE.
206 FORMAT_EVENT_MAX
209 // Helper to record an event related to format conversion from
210 // SQLite to file.
211 static void RecordFormatEvent(FormatEventType event_type);
213 // Some very lucky users have an original-format file still in their
214 // profile. Check for it and delete, recording a histogram for the
215 // result (no histogram for not-found). Logically this
216 // would make more sense at the SafeBrowsingDatabase level, but
217 // practically speaking that code doesn't touch files directly.
218 static void CheckForOriginalAndDelete(const base::FilePath& filename);
220 // Close all files and clear all buffers.
221 bool Close();
223 // Calls |corruption_callback_| if non-NULL, always returns false as
224 // a convenience to the caller.
225 bool OnCorruptDatabase();
227 // Helper for creating a corruption callback for |old_store_|.
228 // TODO(shess): Remove after migration.
229 void HandleCorruptDatabase();
231 // Clear temporary buffers used to accumulate chunk data.
232 bool ClearChunkBuffers() {
233 // NOTE: .clear() doesn't release memory.
234 // TODO(shess): Figure out if this is overkill. Some amount of
235 // pre-reserved space is probably reasonable between each chunk
236 // collected.
237 SBAddPrefixes().swap(add_prefixes_);
238 SBSubPrefixes().swap(sub_prefixes_);
239 std::vector<SBAddFullHash>().swap(add_hashes_);
240 std::vector<SBSubFullHash>().swap(sub_hashes_);
241 return true;
244 // Clear all buffers used during update.
245 void ClearUpdateBuffers() {
246 ClearChunkBuffers();
247 chunks_written_ = 0;
248 std::set<int32>().swap(add_chunks_cache_);
249 std::set<int32>().swap(sub_chunks_cache_);
250 base::hash_set<int32>().swap(add_del_cache_);
251 base::hash_set<int32>().swap(sub_del_cache_);
254 // Buffers for collecting data between BeginChunk() and
255 // FinishChunk().
256 SBAddPrefixes add_prefixes_;
257 SBSubPrefixes sub_prefixes_;
258 std::vector<SBAddFullHash> add_hashes_;
259 std::vector<SBSubFullHash> sub_hashes_;
261 // Count of chunks collected in |new_file_|.
262 int chunks_written_;
264 // Name of the main database file.
265 base::FilePath filename_;
267 // Handles to the main and scratch files. |empty_| is true if the
268 // main file didn't exist when the update was started.
269 file_util::ScopedFILE file_;
270 file_util::ScopedFILE new_file_;
271 bool empty_;
273 // Cache of chunks which have been seen. Loaded from the database
274 // on BeginUpdate() so that it can be queried during the
275 // transaction.
276 std::set<int32> add_chunks_cache_;
277 std::set<int32> sub_chunks_cache_;
279 // Cache the set of deleted chunks during a transaction, applied on
280 // FinishUpdate().
281 // TODO(shess): If the set is small enough, hash_set<> might be
282 // slower than plain set<>.
283 base::hash_set<int32> add_del_cache_;
284 base::hash_set<int32> sub_del_cache_;
286 base::Closure corruption_callback_;
288 // Tracks whether corruption has already been seen in the current
289 // update, so that only one instance is recorded in the stats.
290 // TODO(shess): Remove with format-migration support.
291 bool corruption_seen_;
293 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile);
296 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_