1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
11 #include "base/basictypes.h"
12 #include "base/callback_forward.h"
13 #include "base/containers/hash_tables.h"
14 #include "base/time/time.h"
15 #include "chrome/browser/safe_browsing/prefix_set.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
22 // SafeBrowsingStore provides a storage abstraction for the
23 // safe-browsing data used to build the bloom filter. The items
25 // The set of add and sub chunks seen.
26 // List of SBAddPrefix (chunk_id and SBPrefix).
27 // List of SBSubPrefix (chunk_id and the target SBAddPrefix).
28 // List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
29 // List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
31 // The store is geared towards updating the data, not runtime access
32 // to the data (that is handled by SafeBrowsingDatabase). Updates are
33 // handled similar to a SQL transaction cycle, with the new data being
34 // returned from FinishUpdate() (the COMMIT). Data is not persistent
35 // until FinishUpdate() returns successfully.
37 // FinishUpdate() also handles dropping items who's chunk has been
38 // deleted, and netting out the add/sub lists (when a sub matches an
39 // add, both are dropped).
41 // GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
42 // that these items can be generically compared with each other by
43 // SBAddPrefixLess() and SBAddPrefixHashLess().
49 SBAddPrefix(int32 id
, SBPrefix p
) : chunk_id(id
), prefix(p
) {}
50 SBAddPrefix() : chunk_id(), prefix() {}
52 int32
GetAddChunkId() const { return chunk_id
; }
53 SBPrefix
GetAddPrefix() const { return prefix
; }
56 // TODO(shess): Measure the performance impact of switching this back to
57 // std::vector<> once the v8 file format dominates. Also SBSubPrefixes.
58 typedef std::deque
<SBAddPrefix
> SBAddPrefixes
;
65 SBSubPrefix(int32 id
, int32 add_id
, SBPrefix prefix
)
66 : chunk_id(id
), add_chunk_id(add_id
), add_prefix(prefix
) {}
67 SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {}
69 int32
GetAddChunkId() const { return add_chunk_id
; }
70 SBPrefix
GetAddPrefix() const { return add_prefix
; }
73 typedef std::deque
<SBSubPrefix
> SBSubPrefixes
;
75 struct SBAddFullHash
{
77 int32 received
; // TODO(shess): Deprecate and remove.
80 SBAddFullHash(int32 id
, base::Time r
, const SBFullHash
& h
)
82 received(static_cast<int32
>(r
.ToTimeT())),
86 // Provided for ReadAddHashes() implementations, which already have
87 // an int32 for the time.
88 SBAddFullHash(int32 id
, int32 r
, const SBFullHash
& h
)
89 : chunk_id(id
), received(r
), full_hash(h
) {}
91 SBAddFullHash() : chunk_id(), received(), full_hash() {}
93 int32
GetAddChunkId() const { return chunk_id
; }
94 SBPrefix
GetAddPrefix() const { return full_hash
.prefix
; }
97 struct SBSubFullHash
{
100 SBFullHash full_hash
;
102 SBSubFullHash(int32 id
, int32 add_id
, const SBFullHash
& h
)
103 : chunk_id(id
), add_chunk_id(add_id
), full_hash(h
) {}
104 SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {}
106 int32
GetAddChunkId() const { return add_chunk_id
; }
107 SBPrefix
GetAddPrefix() const { return full_hash
.prefix
; }
110 // Determine less-than based on prefix and add chunk.
111 template <class T
, class U
>
112 bool SBAddPrefixLess(const T
& a
, const U
& b
) {
113 if (a
.GetAddPrefix() != b
.GetAddPrefix())
114 return a
.GetAddPrefix() < b
.GetAddPrefix();
116 return a
.GetAddChunkId() < b
.GetAddChunkId();
119 // Determine less-than based on prefix, add chunk, and full hash.
120 // Prefix can compare differently than hash due to byte ordering,
121 // so it must take precedence.
122 template <class T
, class U
>
123 bool SBAddPrefixHashLess(const T
& a
, const U
& b
) {
124 if (SBAddPrefixLess(a
, b
))
127 if (SBAddPrefixLess(b
, a
))
130 return memcmp(a
.full_hash
.full_hash
, b
.full_hash
.full_hash
,
131 sizeof(a
.full_hash
.full_hash
)) < 0;
134 // Process the lists for subs which knock out adds. For any item in
135 // |sub_prefixes| which has a match in |add_prefixes|, knock out the
136 // matched items from all vectors. Additionally remove items from
139 // The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess.
140 void SBProcessSubs(SBAddPrefixes
* add_prefixes
,
141 SBSubPrefixes
* sub_prefixes
,
142 std::vector
<SBAddFullHash
>* add_full_hashes
,
143 std::vector
<SBSubFullHash
>* sub_full_hashes
,
144 const base::hash_set
<int32
>& add_chunks_deleted
,
145 const base::hash_set
<int32
>& sub_chunks_deleted
);
147 // Abstract interface for storing data.
148 class SafeBrowsingStore
{
150 SafeBrowsingStore() {}
151 virtual ~SafeBrowsingStore() {}
153 // Sets up the information for later use, but does not necessarily
154 // check whether the underlying file exists, or is valid. If
155 // |curruption_callback| is non-NULL it will be called if corruption
156 // is detected, which could happen as part of any call other than
157 // Delete(). The appropriate action is to use Delete() to clear the
159 virtual void Init(const base::FilePath
& filename
,
160 const base::Closure
& corruption_callback
) = 0;
162 // Deletes the files which back the store, returning true if
164 virtual bool Delete() = 0;
166 // Get all Add prefixes out from the store.
167 virtual bool GetAddPrefixes(SBAddPrefixes
* add_prefixes
) = 0;
169 // Get all add full-length hashes.
170 virtual bool GetAddFullHashes(
171 std::vector
<SBAddFullHash
>* add_full_hashes
) = 0;
173 // Start an update. None of the following methods should be called
174 // unless this returns true. If this returns true, the update
175 // should be terminated by FinishUpdate() or CancelUpdate().
176 virtual bool BeginUpdate() = 0;
178 // Start a chunk of data. None of the methods through FinishChunk()
179 // should be called unless this returns true.
180 // TODO(shess): Would it make sense for this to accept |chunk_id|?
181 // Possibly not, because of possible confusion between sub_chunk_id
183 virtual bool BeginChunk() = 0;
185 virtual bool WriteAddPrefix(int32 chunk_id
, SBPrefix prefix
) = 0;
186 virtual bool WriteAddHash(int32 chunk_id
,
187 base::Time receive_time
,
188 const SBFullHash
& full_hash
) = 0;
189 virtual bool WriteSubPrefix(int32 chunk_id
,
190 int32 add_chunk_id
, SBPrefix prefix
) = 0;
191 virtual bool WriteSubHash(int32 chunk_id
, int32 add_chunk_id
,
192 const SBFullHash
& full_hash
) = 0;
194 // Collect the chunk data and preferrably store it on disk to
195 // release memory. Shoul not modify the data in-place.
196 virtual bool FinishChunk() = 0;
198 // Track the chunks which have been seen.
199 virtual void SetAddChunk(int32 chunk_id
) = 0;
200 virtual bool CheckAddChunk(int32 chunk_id
) = 0;
201 virtual void GetAddChunks(std::vector
<int32
>* out
) = 0;
202 virtual void SetSubChunk(int32 chunk_id
) = 0;
203 virtual bool CheckSubChunk(int32 chunk_id
) = 0;
204 virtual void GetSubChunks(std::vector
<int32
>* out
) = 0;
206 // Delete the indicated chunk_id. The chunk will continue to be
207 // visible until the end of the transaction.
208 virtual void DeleteAddChunk(int32 chunk_id
) = 0;
209 virtual void DeleteSubChunk(int32 chunk_id
) = 0;
211 // May be called during update to verify that the storage is valid.
212 // Return true if the store seems valid. If corruption is detected,
213 // calls the corruption callback and return false.
214 // NOTE(shess): When storage was SQLite, there was no guarantee that
215 // a structurally sound database actually contained valid data,
216 // whereas SafeBrowsingStoreFile checksums the data. For now, this
217 // distinction doesn't matter.
218 virtual bool CheckValidity() = 0;
220 // Pass the collected chunks through SBPRocessSubs() and commit to
221 // permanent storage. The resulting add prefixes and hashes will be
222 // stored in |add_prefixes_result| and |add_full_hashes_result|.
223 virtual bool FinishUpdate(
224 safe_browsing::PrefixSetBuilder
* builder
,
225 std::vector
<SBAddFullHash
>* add_full_hashes_result
) = 0;
227 // Cancel the update in process and remove any temporary disk
228 // storage, leaving the original data unmodified.
229 virtual bool CancelUpdate() = 0;
232 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore
);
235 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_