1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
9 #include "base/logging.h"
13 // Find items matching between |subs| and |adds|, and remove them,
14 // recording the item from |adds| in |adds_removed|. To minimize
15 // copies, the inputs are processing in parallel, so |subs| and |adds|
16 // should be compatibly ordered (either by SBAddPrefixLess or
17 // SBAddPrefixHashLess).
19 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add,
20 // for the tightest compare appropriate (see calls in SBProcessSubs).
21 template <typename SubsT
, typename AddsT
,
22 typename PredAddSubT
, typename PredSubAddT
>
23 void KnockoutSubs(SubsT
* subs
, AddsT
* adds
,
24 PredAddSubT predAddSub
, PredSubAddT predSubAdd
,
25 AddsT
* adds_removed
) {
26 // Keep a pair of output iterators for writing kept items. Due to
27 // deletions, these may lag the main iterators. Using erase() on
28 // individual items would result in O(N^2) copies. Using std::list
29 // would work around that, at double or triple the memory cost.
30 typename
AddsT::iterator add_out
= adds
->begin();
31 typename
SubsT::iterator sub_out
= subs
->begin();
33 // Current location in containers.
34 // TODO(shess): I want these to be const_iterator, but then
35 // std::copy() gets confused. Could snag a const_iterator add_end,
36 // or write an inline std::copy(), but it seems like I'm doing
38 typename
AddsT::iterator add_iter
= adds
->begin();
39 typename
SubsT::iterator sub_iter
= subs
->begin();
41 while (add_iter
!= adds
->end() && sub_iter
!= subs
->end()) {
42 // If |*sub_iter| < |*add_iter|, retain the sub.
43 if (predSubAdd(*sub_iter
, *add_iter
)) {
48 // If |*add_iter| < |*sub_iter|, retain the add.
49 } else if (predAddSub(*add_iter
, *sub_iter
)) {
54 // Record equal items and drop them.
56 adds_removed
->push_back(*add_iter
);
62 // Erase any leftover gap.
63 adds
->erase(add_out
, add_iter
);
64 subs
->erase(sub_out
, sub_iter
);
67 // Remove items in |removes| from |full_hashes|. |full_hashes| and
68 // |removes| should be ordered by SBAddPrefix component.
69 template <typename HashesT
, typename AddsT
>
70 void RemoveMatchingPrefixes(const AddsT
& removes
, HashesT
* full_hashes
) {
71 // This is basically an inline of std::set_difference().
72 // Unfortunately, that algorithm requires that the two iterator
73 // pairs use the same value types.
75 // Where to store kept items.
76 typename
HashesT::iterator out
= full_hashes
->begin();
78 typename
HashesT::iterator hash_iter
= full_hashes
->begin();
79 typename
AddsT::const_iterator remove_iter
= removes
.begin();
81 while (hash_iter
!= full_hashes
->end() && remove_iter
!= removes
.end()) {
82 // Keep items less than |*remove_iter|.
83 if (SBAddPrefixLess(*hash_iter
, *remove_iter
)) {
88 // No hit for |*remove_iter|, bump it forward.
89 } else if (SBAddPrefixLess(*remove_iter
, *hash_iter
)) {
92 // Drop equal items, there may be multiple hits.
96 } while (hash_iter
!= full_hashes
->end() &&
97 !SBAddPrefixLess(*remove_iter
, *hash_iter
));
102 // Erase any leftover gap.
103 full_hashes
->erase(out
, hash_iter
);
106 // Remove deleted items (|chunk_id| in |del_set|) from the container.
107 template <typename ItemsT
>
108 void RemoveDeleted(ItemsT
* items
, const base::hash_set
<int32
>& del_set
) {
111 // Move items from |iter| to |end_iter|, skipping items in |del_set|.
112 typename
ItemsT::iterator end_iter
= items
->begin();
113 for (typename
ItemsT::iterator iter
= end_iter
;
114 iter
!= items
->end(); ++iter
) {
115 if (del_set
.count(iter
->chunk_id
) == 0) {
120 items
->erase(end_iter
, items
->end());
125 void SBProcessSubs(SBAddPrefixes
* add_prefixes
,
126 SBSubPrefixes
* sub_prefixes
,
127 std::vector
<SBAddFullHash
>* add_full_hashes
,
128 std::vector
<SBSubFullHash
>* sub_full_hashes
,
129 const base::hash_set
<int32
>& add_chunks_deleted
,
130 const base::hash_set
<int32
>& sub_chunks_deleted
) {
131 // It is possible to structure templates and template
132 // specializations such that the following calls work without having
133 // to qualify things. It becomes very arbitrary, though, and less
134 // clear how things are working.
136 // Sort the inputs by the SBAddPrefix bits.
137 std::sort(add_prefixes
->begin(), add_prefixes
->end(),
138 SBAddPrefixLess
<SBAddPrefix
,SBAddPrefix
>);
139 std::sort(sub_prefixes
->begin(), sub_prefixes
->end(),
140 SBAddPrefixLess
<SBSubPrefix
,SBSubPrefix
>);
141 std::sort(add_full_hashes
->begin(), add_full_hashes
->end(),
142 SBAddPrefixHashLess
<SBAddFullHash
,SBAddFullHash
>);
143 std::sort(sub_full_hashes
->begin(), sub_full_hashes
->end(),
144 SBAddPrefixHashLess
<SBSubFullHash
,SBSubFullHash
>);
146 // Factor out the prefix subs.
147 SBAddPrefixes removed_adds
;
148 KnockoutSubs(sub_prefixes
, add_prefixes
,
149 SBAddPrefixLess
<SBAddPrefix
,SBSubPrefix
>,
150 SBAddPrefixLess
<SBSubPrefix
,SBAddPrefix
>,
153 // Remove the full-hashes corrosponding to the adds which
154 // KnockoutSubs() removed. Processing these w/in KnockoutSubs()
155 // would make the code more complicated, and they are very small
156 // relative to the prefix lists so the gain would be modest.
157 RemoveMatchingPrefixes(removed_adds
, add_full_hashes
);
158 RemoveMatchingPrefixes(removed_adds
, sub_full_hashes
);
160 // Factor out the full-hash subs.
161 std::vector
<SBAddFullHash
> removed_full_adds
;
162 KnockoutSubs(sub_full_hashes
, add_full_hashes
,
163 SBAddPrefixHashLess
<SBAddFullHash
,SBSubFullHash
>,
164 SBAddPrefixHashLess
<SBSubFullHash
,SBAddFullHash
>,
167 // Remove items from the deleted chunks. This is done after other
168 // processing to allow subs to knock out adds (and be removed) even
169 // if the add's chunk is deleted.
170 RemoveDeleted(add_prefixes
, add_chunks_deleted
);
171 RemoveDeleted(sub_prefixes
, sub_chunks_deleted
);
172 RemoveDeleted(add_full_hashes
, add_chunks_deleted
);
173 RemoveDeleted(sub_full_hashes
, sub_chunks_deleted
);