1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
9 #include "base/logging.h"
10 #include "base/metrics/histogram.h"
14 // Return |true| if the range is sorted by the given comparator.
15 template <typename CTI
, typename LESS
>
16 bool sorted(CTI beg
, CTI end
, LESS less
) {
17 while ((end
- beg
) > 2) {
25 // Find items matching between |subs| and |adds|, and remove them. To minimize
26 // copies, the inputs are processing in parallel, so |subs| and |adds| should be
27 // compatibly ordered (either by SBAddPrefixLess or SBAddPrefixHashLess).
29 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add, for the
30 // tightest compare appropriate (see calls in SBProcessSubs).
31 template <typename SubsT
, typename AddsT
,
32 typename PredAddSubT
, typename PredSubAddT
>
33 void KnockoutSubs(SubsT
* subs
, AddsT
* adds
,
34 PredAddSubT predAddSub
, PredSubAddT predSubAdd
) {
35 // Keep a pair of output iterators for writing kept items. Due to
36 // deletions, these may lag the main iterators. Using erase() on
37 // individual items would result in O(N^2) copies. Using std::list
38 // would work around that, at double or triple the memory cost.
39 typename
AddsT::iterator add_out
= adds
->begin();
40 typename
SubsT::iterator sub_out
= subs
->begin();
42 // Current location in containers.
43 // TODO(shess): I want these to be const_iterator, but then
44 // std::copy() gets confused. Could snag a const_iterator add_end,
45 // or write an inline std::copy(), but it seems like I'm doing
47 typename
AddsT::iterator add_iter
= adds
->begin();
48 typename
SubsT::iterator sub_iter
= subs
->begin();
50 while (add_iter
!= adds
->end() && sub_iter
!= subs
->end()) {
51 // If |*sub_iter| < |*add_iter|, retain the sub.
52 if (predSubAdd(*sub_iter
, *add_iter
)) {
57 // If |*add_iter| < |*sub_iter|, retain the add.
58 } else if (predAddSub(*add_iter
, *sub_iter
)) {
70 // Erase any leftover gap.
71 adds
->erase(add_out
, add_iter
);
72 subs
->erase(sub_out
, sub_iter
);
75 // Remove deleted items (|chunk_id| in |del_set|) from the container.
76 template <typename ItemsT
>
77 void RemoveDeleted(ItemsT
* items
, const base::hash_set
<int32
>& del_set
) {
80 // Move items from |iter| to |end_iter|, skipping items in |del_set|.
81 typename
ItemsT::iterator end_iter
= items
->begin();
82 for (typename
ItemsT::iterator iter
= end_iter
;
83 iter
!= items
->end(); ++iter
) {
84 if (del_set
.count(iter
->chunk_id
) == 0) {
89 items
->erase(end_iter
, items
->end());
92 // Remove prefixes which are in the same chunk as their fullhash. This was a
93 // mistake in earlier implementations.
94 template <typename HashesT
, typename PrefixesT
>
95 size_t KnockoutPrefixVolunteers(const HashesT
& full_hashes
,
96 PrefixesT
* prefixes
) {
97 typename
PrefixesT::iterator prefixes_process
= prefixes
->begin();
98 typename
PrefixesT::iterator prefixes_out
= prefixes
->begin();
99 typename
HashesT::const_iterator hashes_process
= full_hashes
.begin();
101 size_t skipped_count
= 0;
103 while (hashes_process
!= full_hashes
.end()) {
104 // Scan prefixes forward until an item is not less than the current hash.
105 while (prefixes_process
!= prefixes
->end() &&
106 SBAddPrefixLess(*prefixes_process
, *hashes_process
)) {
107 if (prefixes_process
!= prefixes_out
) {
108 *prefixes_out
= *prefixes_process
;
114 // If the current hash is also not less than the prefix, that implies they
115 // are equal. Skip the prefix.
116 if (prefixes_process
!= prefixes
->end() &&
117 !SBAddPrefixLess(*hashes_process
, *prefixes_process
)) {
125 // If any prefixes were skipped, copy over the tail and erase the excess.
126 if (prefixes_process
!= prefixes_out
) {
127 prefixes_out
= std::copy(prefixes_process
, prefixes
->end(), prefixes_out
);
128 prefixes
->erase(prefixes_out
, prefixes
->end());
131 return skipped_count
;
136 void SBProcessSubs(SBAddPrefixes
* add_prefixes
,
137 SBSubPrefixes
* sub_prefixes
,
138 std::vector
<SBAddFullHash
>* add_full_hashes
,
139 std::vector
<SBSubFullHash
>* sub_full_hashes
,
140 const base::hash_set
<int32
>& add_chunks_deleted
,
141 const base::hash_set
<int32
>& sub_chunks_deleted
) {
142 // It is possible to structure templates and template
143 // specializations such that the following calls work without having
144 // to qualify things. It becomes very arbitrary, though, and less
145 // clear how things are working.
147 // Make sure things are sorted appropriately.
148 DCHECK(sorted(add_prefixes
->begin(), add_prefixes
->end(),
149 SBAddPrefixLess
<SBAddPrefix
,SBAddPrefix
>));
150 DCHECK(sorted(sub_prefixes
->begin(), sub_prefixes
->end(),
151 SBAddPrefixLess
<SBSubPrefix
,SBSubPrefix
>));
152 DCHECK(sorted(add_full_hashes
->begin(), add_full_hashes
->end(),
153 SBAddPrefixHashLess
<SBAddFullHash
,SBAddFullHash
>));
154 DCHECK(sorted(sub_full_hashes
->begin(), sub_full_hashes
->end(),
155 SBAddPrefixHashLess
<SBSubFullHash
,SBSubFullHash
>));
157 // Earlier database code added prefixes when it saw fullhashes. The protocol
158 // should never send a chunk of mixed prefixes and fullhashes, the following
159 // removes any such cases which are seen.
160 // TODO(shess): Remove this code once most databases have been processed.
161 // Chunk churn should clean up anyone left over. This only takes a few ms to
162 // run through my current database, so it doesn't seem worthwhile to do much
164 size_t skipped
= KnockoutPrefixVolunteers(*add_full_hashes
, add_prefixes
);
165 skipped
+= KnockoutPrefixVolunteers(*sub_full_hashes
, sub_prefixes
);
166 UMA_HISTOGRAM_COUNTS("SB2.VolunteerPrefixesRemoved", skipped
);
168 // Factor out the prefix subs.
169 KnockoutSubs(sub_prefixes
, add_prefixes
,
170 SBAddPrefixLess
<SBAddPrefix
,SBSubPrefix
>,
171 SBAddPrefixLess
<SBSubPrefix
,SBAddPrefix
>);
173 // Factor out the full-hash subs.
174 KnockoutSubs(sub_full_hashes
, add_full_hashes
,
175 SBAddPrefixHashLess
<SBAddFullHash
,SBSubFullHash
>,
176 SBAddPrefixHashLess
<SBSubFullHash
,SBAddFullHash
>);
178 // Remove items from the deleted chunks. This is done after other
179 // processing to allow subs to knock out adds (and be removed) even
180 // if the add's chunk is deleted.
181 RemoveDeleted(add_prefixes
, add_chunks_deleted
);
182 RemoveDeleted(sub_prefixes
, sub_chunks_deleted
);
183 RemoveDeleted(add_full_hashes
, add_chunks_deleted
);
184 RemoveDeleted(sub_full_hashes
, sub_chunks_deleted
);