Disable view source for Developer Tools.
[chromium-blink-merge.git] / chrome / browser / safe_browsing / prefix_set.cc
blob4a1ff73685fc8557a3127d1a3b4edf05426050b8
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/prefix_set.h"
7 #include <algorithm>
8 #include <math.h>
10 #include "base/file_util.h"
11 #include "base/logging.h"
12 #include "base/md5.h"
13 #include "base/metrics/histogram.h"
15 namespace {
17 // |kMagic| should be reasonably unique, and not match itself across
18 // endianness changes. I generated this value with:
19 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9
20 static uint32 kMagic = 0x864088dd;
22 // Current version the code writes out.
23 static uint32 kVersion = 0x1;
25 typedef struct {
26 uint32 magic;
27 uint32 version;
28 uint32 index_size;
29 uint32 deltas_size;
30 } FileHeader;
32 // For |std::upper_bound()| to find a prefix w/in a vector of pairs.
33 bool PrefixLess(const std::pair<SBPrefix,size_t>& a,
34 const std::pair<SBPrefix,size_t>& b) {
35 return a.first < b.first;
38 } // namespace
40 namespace safe_browsing {
42 PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) {
43 if (sorted_prefixes.size()) {
44 // Estimate the resulting vector sizes. There will be strictly
45 // more than |min_runs| entries in |index_|, but there generally
46 // aren't many forced breaks.
47 const size_t min_runs = sorted_prefixes.size() / kMaxRun;
48 index_.reserve(min_runs);
49 deltas_.reserve(sorted_prefixes.size() - min_runs);
51 // Lead with the first prefix.
52 SBPrefix prev_prefix = sorted_prefixes[0];
53 size_t run_length = 0;
54 index_.push_back(std::make_pair(prev_prefix, deltas_.size()));
56 for (size_t i = 1; i < sorted_prefixes.size(); ++i) {
57 // Skip duplicates.
58 if (sorted_prefixes[i] == prev_prefix)
59 continue;
61 // Calculate the delta. |unsigned| is mandatory, because the
62 // sorted_prefixes could be more than INT_MAX apart.
63 DCHECK_GT(sorted_prefixes[i], prev_prefix);
64 const unsigned delta = sorted_prefixes[i] - prev_prefix;
65 const uint16 delta16 = static_cast<uint16>(delta);
67 // New index ref if the delta doesn't fit, or if too many
68 // consecutive deltas have been encoded.
69 if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) {
70 index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size()));
71 run_length = 0;
72 } else {
73 // Continue the run of deltas.
74 deltas_.push_back(delta16);
75 DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta);
76 ++run_length;
79 prev_prefix = sorted_prefixes[i];
82 // Send up some memory-usage stats. Bits because fractional bytes
83 // are weird.
84 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT +
85 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT;
86 const size_t unique_prefixes = index_.size() + deltas_.size();
87 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT;
88 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",
89 bits_used / unique_prefixes,
90 kMaxBitsPerPrefix);
94 PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index,
95 std::vector<uint16> *deltas) {
96 DCHECK(index && deltas);
97 index_.swap(*index);
98 deltas_.swap(*deltas);
101 PrefixSet::~PrefixSet() {}
103 bool PrefixSet::Exists(SBPrefix prefix) const {
104 if (index_.empty())
105 return false;
107 // Find the first position after |prefix| in |index_|.
108 std::vector<std::pair<SBPrefix,size_t> >::const_iterator
109 iter = std::upper_bound(index_.begin(), index_.end(),
110 std::pair<SBPrefix,size_t>(prefix, 0),
111 PrefixLess);
113 // |prefix| comes before anything that's in the set.
114 if (iter == index_.begin())
115 return false;
117 // Capture the upper bound of our target entry's deltas.
118 const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second);
120 // Back up to the entry our target is in.
121 --iter;
123 // All prefixes in |index_| are in the set.
124 SBPrefix current = iter->first;
125 if (current == prefix)
126 return true;
128 // Scan forward accumulating deltas while a match is possible.
129 for (size_t di = iter->second; di < bound && current < prefix; ++di) {
130 current += deltas_[di];
133 return current == prefix;
136 void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const {
137 prefixes->reserve(index_.size() + deltas_.size());
139 for (size_t ii = 0; ii < index_.size(); ++ii) {
140 // The deltas for this |index_| entry run to the next index entry,
141 // or the end of the deltas.
142 const size_t deltas_end =
143 (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size();
145 SBPrefix current = index_[ii].first;
146 prefixes->push_back(current);
147 for (size_t di = index_[ii].second; di < deltas_end; ++di) {
148 current += deltas_[di];
149 prefixes->push_back(current);
154 // static
155 PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) {
156 int64 size_64;
157 if (!base::GetFileSize(filter_name, &size_64))
158 return NULL;
159 using base::MD5Digest;
160 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest)))
161 return NULL;
163 file_util::ScopedFILE file(base::OpenFile(filter_name, "rb"));
164 if (!file.get())
165 return NULL;
167 FileHeader header;
168 size_t read = fread(&header, sizeof(header), 1, file.get());
169 if (read != 1)
170 return NULL;
172 if (header.magic != kMagic || header.version != kVersion)
173 return NULL;
175 std::vector<std::pair<SBPrefix,size_t> > index;
176 const size_t index_bytes = sizeof(index[0]) * header.index_size;
178 std::vector<uint16> deltas;
179 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size;
181 // Check for bogus sizes before allocating any space.
182 const size_t expected_bytes =
183 sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest);
184 if (static_cast<int64>(expected_bytes) != size_64)
185 return NULL;
187 // The file looks valid, start building the digest.
188 base::MD5Context context;
189 base::MD5Init(&context);
190 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
191 sizeof(header)));
193 // Read the index vector. Herb Sutter indicates that vectors are
194 // guaranteed to be contiuguous, so reading to where element 0 lives
195 // is valid.
196 if (header.index_size) {
197 index.resize(header.index_size);
198 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get());
199 if (read != index.size())
200 return NULL;
201 base::MD5Update(&context,
202 base::StringPiece(reinterpret_cast<char*>(&(index[0])),
203 index_bytes));
206 // Read vector of deltas.
207 if (header.deltas_size) {
208 deltas.resize(header.deltas_size);
209 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get());
210 if (read != deltas.size())
211 return NULL;
212 base::MD5Update(&context,
213 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])),
214 deltas_bytes));
217 base::MD5Digest calculated_digest;
218 base::MD5Final(&calculated_digest, &context);
220 base::MD5Digest file_digest;
221 read = fread(&file_digest, sizeof(file_digest), 1, file.get());
222 if (read != 1)
223 return NULL;
225 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
226 return NULL;
228 // Steals contents of |index| and |deltas| via swap().
229 return new PrefixSet(&index, &deltas);
232 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const {
233 FileHeader header;
234 header.magic = kMagic;
235 header.version = kVersion;
236 header.index_size = static_cast<uint32>(index_.size());
237 header.deltas_size = static_cast<uint32>(deltas_.size());
239 // Sanity check that the 32-bit values never mess things up.
240 if (static_cast<size_t>(header.index_size) != index_.size() ||
241 static_cast<size_t>(header.deltas_size) != deltas_.size()) {
242 NOTREACHED();
243 return false;
246 file_util::ScopedFILE file(base::OpenFile(filter_name, "wb"));
247 if (!file.get())
248 return false;
250 base::MD5Context context;
251 base::MD5Init(&context);
253 // TODO(shess): The I/O code in safe_browsing_store_file.cc would
254 // sure be useful about now.
255 size_t written = fwrite(&header, sizeof(header), 1, file.get());
256 if (written != 1)
257 return false;
258 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
259 sizeof(header)));
261 // As for reads, the standard guarantees the ability to access the
262 // contents of the vector by a pointer to an element.
263 if (index_.size()) {
264 const size_t index_bytes = sizeof(index_[0]) * index_.size();
265 written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(),
266 file.get());
267 if (written != index_.size())
268 return false;
269 base::MD5Update(&context,
270 base::StringPiece(
271 reinterpret_cast<const char*>(&(index_[0])),
272 index_bytes));
275 if (deltas_.size()) {
276 const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size();
277 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(),
278 file.get());
279 if (written != deltas_.size())
280 return false;
281 base::MD5Update(&context,
282 base::StringPiece(
283 reinterpret_cast<const char*>(&(deltas_[0])),
284 deltas_bytes));
287 base::MD5Digest digest;
288 base::MD5Final(&digest, &context);
289 written = fwrite(&digest, sizeof(digest), 1, file.get());
290 if (written != 1)
291 return false;
293 // TODO(shess): Can this code check that the close was successful?
294 file.reset();
296 return true;
299 } // namespace safe_browsing