1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/prefix_set.h"
10 #include "base/file_util.h"
11 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
17 // |kMagic| should be reasonably unique, and not match itself across
18 // endianness changes. I generated this value with:
19 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9
20 static uint32 kMagic
= 0x864088dd;
22 // Current version the code writes out.
23 static uint32 kVersion
= 0x1;
34 namespace safe_browsing
{
36 // For |std::upper_bound()| to find a prefix w/in a vector of pairs.
38 bool PrefixSet::PrefixLess(const IndexPair
& a
, const IndexPair
& b
) {
39 return a
.first
< b
.first
;
42 PrefixSet::PrefixSet(const std::vector
<SBPrefix
>& sorted_prefixes
) {
43 if (sorted_prefixes
.size()) {
44 // Estimate the resulting vector sizes. There will be strictly
45 // more than |min_runs| entries in |index_|, but there generally
46 // aren't many forced breaks.
47 const size_t min_runs
= sorted_prefixes
.size() / kMaxRun
;
48 index_
.reserve(min_runs
);
49 deltas_
.reserve(sorted_prefixes
.size() - min_runs
);
51 // Lead with the first prefix.
52 SBPrefix prev_prefix
= sorted_prefixes
[0];
53 size_t run_length
= 0;
54 index_
.push_back(std::make_pair(prev_prefix
, deltas_
.size()));
56 for (size_t i
= 1; i
< sorted_prefixes
.size(); ++i
) {
58 if (sorted_prefixes
[i
] == prev_prefix
)
61 // Calculate the delta. |unsigned| is mandatory, because the
62 // sorted_prefixes could be more than INT_MAX apart.
63 DCHECK_GT(sorted_prefixes
[i
], prev_prefix
);
64 const unsigned delta
= sorted_prefixes
[i
] - prev_prefix
;
65 const uint16 delta16
= static_cast<uint16
>(delta
);
67 // New index ref if the delta doesn't fit, or if too many
68 // consecutive deltas have been encoded.
69 if (delta
!= static_cast<unsigned>(delta16
) || run_length
>= kMaxRun
) {
70 index_
.push_back(std::make_pair(sorted_prefixes
[i
], deltas_
.size()));
73 // Continue the run of deltas.
74 deltas_
.push_back(delta16
);
75 DCHECK_EQ(static_cast<unsigned>(deltas_
.back()), delta
);
79 prev_prefix
= sorted_prefixes
[i
];
82 // Send up some memory-usage stats. Bits because fractional bytes
84 const size_t bits_used
= index_
.size() * sizeof(index_
[0]) * CHAR_BIT
+
85 deltas_
.size() * sizeof(deltas_
[0]) * CHAR_BIT
;
86 const size_t unique_prefixes
= index_
.size() + deltas_
.size();
87 static const size_t kMaxBitsPerPrefix
= sizeof(SBPrefix
) * CHAR_BIT
;
88 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",
89 bits_used
/ unique_prefixes
,
94 PrefixSet::PrefixSet(IndexVector
* index
, std::vector
<uint16
>* deltas
) {
95 DCHECK(index
&& deltas
);
97 deltas_
.swap(*deltas
);
100 PrefixSet::~PrefixSet() {}
102 bool PrefixSet::Exists(SBPrefix prefix
) const {
106 // Find the first position after |prefix| in |index_|.
107 IndexVector::const_iterator iter
=
108 std::upper_bound(index_
.begin(), index_
.end(),
109 IndexPair(prefix
, 0), PrefixLess
);
111 // |prefix| comes before anything that's in the set.
112 if (iter
== index_
.begin())
115 // Capture the upper bound of our target entry's deltas.
116 const size_t bound
= (iter
== index_
.end() ? deltas_
.size() : iter
->second
);
118 // Back up to the entry our target is in.
121 // All prefixes in |index_| are in the set.
122 SBPrefix current
= iter
->first
;
123 if (current
== prefix
)
126 // Scan forward accumulating deltas while a match is possible.
127 for (size_t di
= iter
->second
; di
< bound
&& current
< prefix
; ++di
) {
128 current
+= deltas_
[di
];
131 return current
== prefix
;
134 void PrefixSet::GetPrefixes(std::vector
<SBPrefix
>* prefixes
) const {
135 prefixes
->reserve(index_
.size() + deltas_
.size());
137 for (size_t ii
= 0; ii
< index_
.size(); ++ii
) {
138 // The deltas for this |index_| entry run to the next index entry,
139 // or the end of the deltas.
140 const size_t deltas_end
=
141 (ii
+ 1 < index_
.size()) ? index_
[ii
+ 1].second
: deltas_
.size();
143 SBPrefix current
= index_
[ii
].first
;
144 prefixes
->push_back(current
);
145 for (size_t di
= index_
[ii
].second
; di
< deltas_end
; ++di
) {
146 current
+= deltas_
[di
];
147 prefixes
->push_back(current
);
153 PrefixSet
* PrefixSet::LoadFile(const base::FilePath
& filter_name
) {
155 if (!base::GetFileSize(filter_name
, &size_64
))
157 using base::MD5Digest
;
158 if (size_64
< static_cast<int64
>(sizeof(FileHeader
) + sizeof(MD5Digest
)))
161 file_util::ScopedFILE
file(base::OpenFile(filter_name
, "rb"));
166 size_t read
= fread(&header
, sizeof(header
), 1, file
.get());
170 if (header
.magic
!= kMagic
|| header
.version
!= kVersion
)
174 const size_t index_bytes
= sizeof(index
[0]) * header
.index_size
;
176 // For a time, the second element of the index_ pair was a size_t rather than
177 // a fixed-size value. This structure will be used to check, read and convert
178 // in case a 64-bit size_t was written.
179 std::vector
<std::pair
<SBPrefix
,uint64
> > alt_index
;
180 const size_t alt_index_bytes
= sizeof(alt_index
[0]) * header
.index_size
;
182 std::vector
<uint16
> deltas
;
183 const size_t deltas_bytes
= sizeof(deltas
[0]) * header
.deltas_size
;
185 // Check for bogus sizes before allocating any space.
186 const size_t expected_bytes
=
187 sizeof(header
) + index_bytes
+ deltas_bytes
+ sizeof(MD5Digest
);
188 bool read_alt_index
= false;
189 if (static_cast<int64
>(expected_bytes
) != size_64
) {
190 const size_t alt_expected_bytes
=
191 sizeof(header
) + alt_index_bytes
+ deltas_bytes
+ sizeof(MD5Digest
);
192 if (static_cast<int64
>(alt_expected_bytes
) != size_64
)
195 read_alt_index
= true;
198 // The file looks valid, start building the digest.
199 base::MD5Context context
;
200 base::MD5Init(&context
);
201 base::MD5Update(&context
, base::StringPiece(reinterpret_cast<char*>(&header
),
204 // Read the index vector. Herb Sutter indicates that vectors are
205 // guaranteed to be contiuguous, so reading to where element 0 lives
207 if (read_alt_index
) {
208 alt_index
.resize(header
.index_size
);
209 read
= fread(&(alt_index
[0]), sizeof(alt_index
[0]), alt_index
.size(),
211 if (read
!= alt_index
.size())
213 base::MD5Update(&context
,
214 base::StringPiece(reinterpret_cast<char*>(&(alt_index
[0])),
217 index
.reserve(alt_index
.size());
218 for (size_t i
= 0; i
< alt_index
.size(); ++i
) {
219 const uint32 ofs
= static_cast<uint32
>(alt_index
[i
].second
);
220 if (static_cast<uint64
>(ofs
) != alt_index
[i
].second
)
222 index
.push_back(std::make_pair(alt_index
[i
].first
, ofs
));
224 } else if (header
.index_size
) {
225 index
.resize(header
.index_size
);
226 read
= fread(&(index
[0]), sizeof(index
[0]), index
.size(), file
.get());
227 if (read
!= index
.size())
229 base::MD5Update(&context
,
230 base::StringPiece(reinterpret_cast<char*>(&(index
[0])),
234 // Read vector of deltas.
235 if (header
.deltas_size
) {
236 deltas
.resize(header
.deltas_size
);
237 read
= fread(&(deltas
[0]), sizeof(deltas
[0]), deltas
.size(), file
.get());
238 if (read
!= deltas
.size())
240 base::MD5Update(&context
,
241 base::StringPiece(reinterpret_cast<char*>(&(deltas
[0])),
245 base::MD5Digest calculated_digest
;
246 base::MD5Final(&calculated_digest
, &context
);
248 base::MD5Digest file_digest
;
249 read
= fread(&file_digest
, sizeof(file_digest
), 1, file
.get());
253 if (0 != memcmp(&file_digest
, &calculated_digest
, sizeof(file_digest
)))
256 // Steals contents of |index| and |deltas| via swap().
257 return new PrefixSet(&index
, &deltas
);
260 bool PrefixSet::WriteFile(const base::FilePath
& filter_name
) const {
262 header
.magic
= kMagic
;
263 header
.version
= kVersion
;
264 header
.index_size
= static_cast<uint32
>(index_
.size());
265 header
.deltas_size
= static_cast<uint32
>(deltas_
.size());
267 // Sanity check that the 32-bit values never mess things up.
268 if (static_cast<size_t>(header
.index_size
) != index_
.size() ||
269 static_cast<size_t>(header
.deltas_size
) != deltas_
.size()) {
274 file_util::ScopedFILE
file(base::OpenFile(filter_name
, "wb"));
278 base::MD5Context context
;
279 base::MD5Init(&context
);
281 // TODO(shess): The I/O code in safe_browsing_store_file.cc would
282 // sure be useful about now.
283 size_t written
= fwrite(&header
, sizeof(header
), 1, file
.get());
286 base::MD5Update(&context
, base::StringPiece(reinterpret_cast<char*>(&header
),
289 // As for reads, the standard guarantees the ability to access the
290 // contents of the vector by a pointer to an element.
292 const size_t index_bytes
= sizeof(index_
[0]) * index_
.size();
293 written
= fwrite(&(index_
[0]), sizeof(index_
[0]), index_
.size(),
295 if (written
!= index_
.size())
297 base::MD5Update(&context
,
299 reinterpret_cast<const char*>(&(index_
[0])),
303 if (deltas_
.size()) {
304 const size_t deltas_bytes
= sizeof(deltas_
[0]) * deltas_
.size();
305 written
= fwrite(&(deltas_
[0]), sizeof(deltas_
[0]), deltas_
.size(),
307 if (written
!= deltas_
.size())
309 base::MD5Update(&context
,
311 reinterpret_cast<const char*>(&(deltas_
[0])),
315 base::MD5Digest digest
;
316 base::MD5Final(&digest
, &context
);
317 written
= fwrite(&digest
, sizeof(digest
), 1, file
.get());
321 // TODO(shess): Can this code check that the close was successful?
327 } // namespace safe_browsing