1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
10 #include "base/bind.h"
11 #include "base/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
32 using content::BrowserThread
;
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile
[] =
38 FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile
[] =
41 FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile
[] =
44 FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile
[] =
47 FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile
[] =
50 FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile
[] =
53 FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile
[] =
56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile
[] =
59 FILE_PATH_LITERAL(" IP Blacklist");
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
67 const base::FilePath::CharType kBrowseDBFile
[] = FILE_PATH_LITERAL(" Bloom");
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes
= 45;
72 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize
= 5000;
77 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl
[] =
80 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
82 // If the hash of this exact expression is on a whitelist then the
83 // malware IP blacklisting feature will be disabled in csd.
85 const char kMalwareIPKillSwitchUrl
[] =
86 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
88 const size_t kMaxIpPrefixSize
= 128;
89 const size_t kMinIpPrefixSize
= 1;
91 // To save space, the incoming |chunk_id| and |list_id| are combined
92 // into an |encoded_chunk_id| for storage by shifting the |list_id|
93 // into the low-order bits. These functions decode that information.
94 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
95 // there should be better ways to save chunk_id and list_id after we use
96 // SafeBrowsingStoreFile.
97 int GetListIdBit(const int encoded_chunk_id
) {
98 return encoded_chunk_id
& 1;
100 int DecodeChunkId(int encoded_chunk_id
) {
101 return encoded_chunk_id
>> 1;
103 int EncodeChunkId(const int chunk
, const int list_id
) {
104 DCHECK_NE(list_id
, safe_browsing_util::INVALID
);
105 return chunk
<< 1 | list_id
% 2;
108 // Generate the set of full hashes to check for |url|. If
109 // |include_whitelist_hashes| is true we will generate additional path-prefixes
110 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the
111 // whitelist it should also match /foo/bar which is not the case for all the
112 // other lists. We'll also always add a pattern for the empty path.
113 // TODO(shess): This function is almost the same as
114 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
115 // does an early exit on match. Since match should be the infrequent
116 // case (phishing or malware found), consider combining this function
118 void BrowseFullHashesToCheck(const GURL
& url
,
119 bool include_whitelist_hashes
,
120 std::vector
<SBFullHash
>* full_hashes
) {
121 std::vector
<std::string
> hosts
;
122 if (url
.HostIsIPAddress()) {
123 hosts
.push_back(url
.host());
125 safe_browsing_util::GenerateHostsToCheck(url
, &hosts
);
128 std::vector
<std::string
> paths
;
129 safe_browsing_util::GeneratePathsToCheck(url
, &paths
);
131 for (size_t i
= 0; i
< hosts
.size(); ++i
) {
132 for (size_t j
= 0; j
< paths
.size(); ++j
) {
133 const std::string
& path
= paths
[j
];
134 full_hashes
->push_back(SBFullHashForString(hosts
[i
] + path
));
136 // We may have /foo as path-prefix in the whitelist which should
137 // also match with /foo/bar and /foo?bar. Hence, for every path
138 // that ends in '/' we also add the path without the slash.
139 if (include_whitelist_hashes
&&
141 path
[path
.size() - 1] == '/') {
142 full_hashes
->push_back(
143 SBFullHashForString(hosts
[i
] + path
.substr(0, path
.size() - 1)));
149 // Get the prefixes matching the download |urls|.
150 void GetDownloadUrlPrefixes(const std::vector
<GURL
>& urls
,
151 std::vector
<SBPrefix
>* prefixes
) {
152 std::vector
<SBFullHash
> full_hashes
;
153 for (size_t i
= 0; i
< urls
.size(); ++i
)
154 BrowseFullHashesToCheck(urls
[i
], false, &full_hashes
);
156 for (size_t i
= 0; i
< full_hashes
.size(); ++i
)
157 prefixes
->push_back(full_hashes
[i
].prefix
);
160 // Helper function to compare addprefixes in |store| with |prefixes|.
161 // The |list_bit| indicates which list (url or hash) to compare.
163 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
164 // the actual matching prefixes.
165 bool MatchAddPrefixes(SafeBrowsingStore
* store
,
167 const std::vector
<SBPrefix
>& prefixes
,
168 std::vector
<SBPrefix
>* prefix_hits
) {
169 prefix_hits
->clear();
170 bool found_match
= false;
172 SBAddPrefixes add_prefixes
;
173 store
->GetAddPrefixes(&add_prefixes
);
174 for (SBAddPrefixes::const_iterator iter
= add_prefixes
.begin();
175 iter
!= add_prefixes
.end(); ++iter
) {
176 for (size_t j
= 0; j
< prefixes
.size(); ++j
) {
177 const SBPrefix
& prefix
= prefixes
[j
];
178 if (prefix
== iter
->prefix
&&
179 GetListIdBit(iter
->chunk_id
) == list_bit
) {
180 prefix_hits
->push_back(prefix
);
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired. "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough. Expired items are not deleted because a
192 // future update may make them acceptable again.
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(const std::vector
<SBPrefix
>& prefix_hits
,
197 const std::vector
<SBAddFullHash
>& full_hashes
,
198 std::vector
<SBFullHashResult
>* full_hits
,
199 base::Time last_update
) {
200 const base::Time expire_time
=
201 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes
);
203 std::vector
<SBPrefix
>::const_iterator piter
= prefix_hits
.begin();
204 std::vector
<SBAddFullHash
>::const_iterator hiter
= full_hashes
.begin();
206 while (piter
!= prefix_hits
.end() && hiter
!= full_hashes
.end()) {
207 if (*piter
< hiter
->full_hash
.prefix
) {
209 } else if (hiter
->full_hash
.prefix
< *piter
) {
212 if (expire_time
< last_update
||
213 expire_time
.ToTimeT() < hiter
->received
) {
214 SBFullHashResult result
;
215 const int list_bit
= GetListIdBit(hiter
->chunk_id
);
216 DCHECK(list_bit
== safe_browsing_util::MALWARE
||
217 list_bit
== safe_browsing_util::PHISH
);
218 const safe_browsing_util::ListType list_id
=
219 static_cast<safe_browsing_util::ListType
>(list_bit
);
220 if (!safe_browsing_util::GetListName(list_id
, &result
.list_name
))
222 result
.add_chunk_id
= DecodeChunkId(hiter
->chunk_id
);
223 result
.hash
= hiter
->full_hash
;
224 full_hits
->push_back(result
);
227 // Only increment |hiter|, |piter| might have multiple hits.
233 // This function generates a chunk range string for |chunks|. It
234 // outputs one chunk range string per list and writes it to the
235 // |list_ranges| vector. We expect |list_ranges| to already be of the
236 // right size. E.g., if |chunks| contains chunks with two different
237 // list ids then |list_ranges| must contain two elements.
238 void GetChunkRanges(const std::vector
<int>& chunks
,
239 std::vector
<std::string
>* list_ranges
) {
240 // Since there are 2 possible list ids, there must be exactly two
241 // list ranges. Even if the chunk data should only contain one
242 // line, this code has to somehow handle corruption.
243 DCHECK_EQ(2U, list_ranges
->size());
245 std::vector
<std::vector
<int> > decoded_chunks(list_ranges
->size());
246 for (std::vector
<int>::const_iterator iter
= chunks
.begin();
247 iter
!= chunks
.end(); ++iter
) {
248 int mod_list_id
= GetListIdBit(*iter
);
249 DCHECK_GE(mod_list_id
, 0);
250 DCHECK_LT(static_cast<size_t>(mod_list_id
), decoded_chunks
.size());
251 decoded_chunks
[mod_list_id
].push_back(DecodeChunkId(*iter
));
253 for (size_t i
= 0; i
< decoded_chunks
.size(); ++i
) {
254 ChunksToRangeString(decoded_chunks
[i
], &((*list_ranges
)[i
]));
258 // Helper function to create chunk range lists for Browse related
260 void UpdateChunkRanges(SafeBrowsingStore
* store
,
261 const std::vector
<std::string
>& listnames
,
262 std::vector
<SBListChunkRanges
>* lists
) {
266 DCHECK_GT(listnames
.size(), 0U);
267 DCHECK_LE(listnames
.size(), 2U);
268 std::vector
<int> add_chunks
;
269 std::vector
<int> sub_chunks
;
270 store
->GetAddChunks(&add_chunks
);
271 store
->GetSubChunks(&sub_chunks
);
273 // Always decode 2 ranges, even if only the first one is expected.
274 // The loop below will only load as many into |lists| as |listnames|
276 std::vector
<std::string
> adds(2);
277 std::vector
<std::string
> subs(2);
278 GetChunkRanges(add_chunks
, &adds
);
279 GetChunkRanges(sub_chunks
, &subs
);
281 for (size_t i
= 0; i
< listnames
.size(); ++i
) {
282 const std::string
& listname
= listnames
[i
];
283 DCHECK_EQ(safe_browsing_util::GetListId(listname
) % 2,
284 static_cast<int>(i
% 2));
285 DCHECK_NE(safe_browsing_util::GetListId(listname
),
286 safe_browsing_util::INVALID
);
287 lists
->push_back(SBListChunkRanges(listname
));
288 lists
->back().adds
.swap(adds
[i
]);
289 lists
->back().subs
.swap(subs
[i
]);
293 void UpdateChunkRangesForLists(SafeBrowsingStore
* store
,
294 const std::string
& listname0
,
295 const std::string
& listname1
,
296 std::vector
<SBListChunkRanges
>* lists
) {
297 std::vector
<std::string
> listnames
;
298 listnames
.push_back(listname0
);
299 listnames
.push_back(listname1
);
300 UpdateChunkRanges(store
, listnames
, lists
);
303 void UpdateChunkRangesForList(SafeBrowsingStore
* store
,
304 const std::string
& listname
,
305 std::vector
<SBListChunkRanges
>* lists
) {
306 UpdateChunkRanges(store
, std::vector
<std::string
>(1, listname
), lists
);
309 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
310 // safe_browsing_store.h orders on both chunk-id and prefix.
311 bool SBAddFullHashPrefixLess(const SBAddFullHash
& a
, const SBAddFullHash
& b
) {
312 return a
.full_hash
.prefix
< b
.full_hash
.prefix
;
315 // This code always checks for non-zero file size. This helper makes
316 // that less verbose.
317 int64
GetFileSizeOrZero(const base::FilePath
& file_path
) {
319 if (!base::GetFileSize(file_path
, &size_64
))
324 // Used to order whitelist storage in memory.
325 bool SBFullHashLess(const SBFullHash
& a
, const SBFullHash
& b
) {
326 return memcmp(a
.full_hash
, b
.full_hash
, sizeof(a
.full_hash
)) < 0;
331 // The default SafeBrowsingDatabaseFactory.
332 class SafeBrowsingDatabaseFactoryImpl
: public SafeBrowsingDatabaseFactory
{
334 virtual SafeBrowsingDatabase
* CreateSafeBrowsingDatabase(
335 bool enable_download_protection
,
336 bool enable_client_side_whitelist
,
337 bool enable_download_whitelist
,
338 bool enable_extension_blacklist
,
339 bool enable_side_effect_free_whitelist
,
340 bool enable_ip_blacklist
) OVERRIDE
{
341 return new SafeBrowsingDatabaseNew(
342 new SafeBrowsingStoreFile
,
343 enable_download_protection
? new SafeBrowsingStoreFile
: NULL
,
344 enable_client_side_whitelist
? new SafeBrowsingStoreFile
: NULL
,
345 enable_download_whitelist
? new SafeBrowsingStoreFile
: NULL
,
346 enable_extension_blacklist
? new SafeBrowsingStoreFile
: NULL
,
347 enable_side_effect_free_whitelist
? new SafeBrowsingStoreFile
: NULL
,
348 enable_ip_blacklist
? new SafeBrowsingStoreFile
: NULL
);
351 SafeBrowsingDatabaseFactoryImpl() { }
354 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl
);
358 SafeBrowsingDatabaseFactory
* SafeBrowsingDatabase::factory_
= NULL
;
360 // Factory method, non-thread safe. Caller has to make sure this s called
361 // on SafeBrowsing Thread.
362 // TODO(shess): There's no need for a factory any longer. Convert
363 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
364 // callers just construct things directly.
365 SafeBrowsingDatabase
* SafeBrowsingDatabase::Create(
366 bool enable_download_protection
,
367 bool enable_client_side_whitelist
,
368 bool enable_download_whitelist
,
369 bool enable_extension_blacklist
,
370 bool enable_side_effect_free_whitelist
,
371 bool enable_ip_blacklist
) {
373 factory_
= new SafeBrowsingDatabaseFactoryImpl();
374 return factory_
->CreateSafeBrowsingDatabase(
375 enable_download_protection
,
376 enable_client_side_whitelist
,
377 enable_download_whitelist
,
378 enable_extension_blacklist
,
379 enable_side_effect_free_whitelist
,
380 enable_ip_blacklist
);
383 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
387 base::FilePath
SafeBrowsingDatabase::BrowseDBFilename(
388 const base::FilePath
& db_base_filename
) {
389 return base::FilePath(db_base_filename
.value() + kBrowseDBFile
);
393 base::FilePath
SafeBrowsingDatabase::DownloadDBFilename(
394 const base::FilePath
& db_base_filename
) {
395 return base::FilePath(db_base_filename
.value() + kDownloadDBFile
);
399 base::FilePath
SafeBrowsingDatabase::BloomFilterForFilename(
400 const base::FilePath
& db_filename
) {
401 return base::FilePath(db_filename
.value() + kBloomFilterFile
);
405 base::FilePath
SafeBrowsingDatabase::PrefixSetForFilename(
406 const base::FilePath
& db_filename
) {
407 return base::FilePath(db_filename
.value() + kPrefixSetFile
);
411 base::FilePath
SafeBrowsingDatabase::CsdWhitelistDBFilename(
412 const base::FilePath
& db_filename
) {
413 return base::FilePath(db_filename
.value() + kCsdWhitelistDBFile
);
417 base::FilePath
SafeBrowsingDatabase::DownloadWhitelistDBFilename(
418 const base::FilePath
& db_filename
) {
419 return base::FilePath(db_filename
.value() + kDownloadWhitelistDBFile
);
423 base::FilePath
SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
424 const base::FilePath
& db_filename
) {
425 return base::FilePath(db_filename
.value() + kExtensionBlacklistDBFile
);
429 base::FilePath
SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
430 const base::FilePath
& db_filename
) {
431 return base::FilePath(db_filename
.value() + kSideEffectFreeWhitelistDBFile
);
435 base::FilePath
SafeBrowsingDatabase::IpBlacklistDBFilename(
436 const base::FilePath
& db_filename
) {
437 return base::FilePath(db_filename
.value() + kIPBlacklistDBFile
);
440 SafeBrowsingStore
* SafeBrowsingDatabaseNew::GetStore(const int list_id
) {
441 if (list_id
== safe_browsing_util::PHISH
||
442 list_id
== safe_browsing_util::MALWARE
) {
443 return browse_store_
.get();
444 } else if (list_id
== safe_browsing_util::BINURL
) {
445 return download_store_
.get();
446 } else if (list_id
== safe_browsing_util::CSDWHITELIST
) {
447 return csd_whitelist_store_
.get();
448 } else if (list_id
== safe_browsing_util::DOWNLOADWHITELIST
) {
449 return download_whitelist_store_
.get();
450 } else if (list_id
== safe_browsing_util::EXTENSIONBLACKLIST
) {
451 return extension_blacklist_store_
.get();
452 } else if (list_id
== safe_browsing_util::SIDEEFFECTFREEWHITELIST
) {
453 return side_effect_free_whitelist_store_
.get();
454 } else if (list_id
== safe_browsing_util::IPBLACKLIST
) {
455 return ip_blacklist_store_
.get();
461 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type
) {
462 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type
,
463 FAILURE_DATABASE_MAX
);
466 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
467 : creation_loop_(base::MessageLoop::current()),
468 browse_store_(new SafeBrowsingStoreFile
),
469 reset_factory_(this),
470 corruption_detected_(false),
471 change_detected_(false) {
472 DCHECK(browse_store_
.get());
473 DCHECK(!download_store_
.get());
474 DCHECK(!csd_whitelist_store_
.get());
475 DCHECK(!download_whitelist_store_
.get());
476 DCHECK(!extension_blacklist_store_
.get());
477 DCHECK(!side_effect_free_whitelist_store_
.get());
478 DCHECK(!ip_blacklist_store_
.get());
481 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
482 SafeBrowsingStore
* browse_store
,
483 SafeBrowsingStore
* download_store
,
484 SafeBrowsingStore
* csd_whitelist_store
,
485 SafeBrowsingStore
* download_whitelist_store
,
486 SafeBrowsingStore
* extension_blacklist_store
,
487 SafeBrowsingStore
* side_effect_free_whitelist_store
,
488 SafeBrowsingStore
* ip_blacklist_store
)
489 : creation_loop_(base::MessageLoop::current()),
490 browse_store_(browse_store
),
491 download_store_(download_store
),
492 csd_whitelist_store_(csd_whitelist_store
),
493 download_whitelist_store_(download_whitelist_store
),
494 extension_blacklist_store_(extension_blacklist_store
),
495 side_effect_free_whitelist_store_(side_effect_free_whitelist_store
),
496 ip_blacklist_store_(ip_blacklist_store
),
497 reset_factory_(this),
498 corruption_detected_(false) {
499 DCHECK(browse_store_
.get());
502 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
503 // The DCHECK is disabled due to crbug.com/338486 .
504 // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
507 void SafeBrowsingDatabaseNew::Init(const base::FilePath
& filename_base
) {
508 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
509 // Ensure we haven't been run before.
510 DCHECK(browse_filename_
.empty());
511 DCHECK(download_filename_
.empty());
512 DCHECK(csd_whitelist_filename_
.empty());
513 DCHECK(download_whitelist_filename_
.empty());
514 DCHECK(extension_blacklist_filename_
.empty());
515 DCHECK(side_effect_free_whitelist_filename_
.empty());
516 DCHECK(ip_blacklist_filename_
.empty());
518 browse_filename_
= BrowseDBFilename(filename_base
);
519 browse_prefix_set_filename_
= PrefixSetForFilename(browse_filename_
);
523 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
524 base::Unretained(this)));
525 DVLOG(1) << "Init browse store: " << browse_filename_
.value();
528 // NOTE: There is no need to grab the lock in this function, since
529 // until it returns, there are no pointers to this class on other
530 // threads. Then again, that means there is no possibility of
531 // contention on the lock...
532 base::AutoLock
locked(lookup_lock_
);
533 full_browse_hashes_
.clear();
534 pending_browse_hashes_
.clear();
538 if (download_store_
.get()) {
539 download_filename_
= DownloadDBFilename(filename_base
);
540 download_store_
->Init(
542 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
543 base::Unretained(this)));
544 DVLOG(1) << "Init download store: " << download_filename_
.value();
547 if (csd_whitelist_store_
.get()) {
548 csd_whitelist_filename_
= CsdWhitelistDBFilename(filename_base
);
549 csd_whitelist_store_
->Init(
550 csd_whitelist_filename_
,
551 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
552 base::Unretained(this)));
553 DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_
.value();
554 std::vector
<SBAddFullHash
> full_hashes
;
555 if (csd_whitelist_store_
->GetAddFullHashes(&full_hashes
)) {
556 LoadWhitelist(full_hashes
, &csd_whitelist_
);
558 WhitelistEverything(&csd_whitelist_
);
561 WhitelistEverything(&csd_whitelist_
); // Just to be safe.
564 if (download_whitelist_store_
.get()) {
565 download_whitelist_filename_
= DownloadWhitelistDBFilename(filename_base
);
566 download_whitelist_store_
->Init(
567 download_whitelist_filename_
,
568 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
569 base::Unretained(this)));
570 DVLOG(1) << "Init download whitelist store: "
571 << download_whitelist_filename_
.value();
572 std::vector
<SBAddFullHash
> full_hashes
;
573 if (download_whitelist_store_
->GetAddFullHashes(&full_hashes
)) {
574 LoadWhitelist(full_hashes
, &download_whitelist_
);
576 WhitelistEverything(&download_whitelist_
);
579 WhitelistEverything(&download_whitelist_
); // Just to be safe.
582 if (extension_blacklist_store_
.get()) {
583 extension_blacklist_filename_
= ExtensionBlacklistDBFilename(filename_base
);
584 extension_blacklist_store_
->Init(
585 extension_blacklist_filename_
,
586 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
587 base::Unretained(this)));
588 DVLOG(1) << "Init extension blacklist store: "
589 << extension_blacklist_filename_
.value();
592 if (side_effect_free_whitelist_store_
.get()) {
593 side_effect_free_whitelist_filename_
=
594 SideEffectFreeWhitelistDBFilename(filename_base
);
595 side_effect_free_whitelist_prefix_set_filename_
=
596 PrefixSetForFilename(side_effect_free_whitelist_filename_
);
597 side_effect_free_whitelist_store_
->Init(
598 side_effect_free_whitelist_filename_
,
599 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
600 base::Unretained(this)));
601 DVLOG(1) << "Init side-effect free whitelist store: "
602 << side_effect_free_whitelist_filename_
.value();
604 // If there is no database, the filter cannot be used.
605 base::File::Info db_info
;
606 if (base::GetFileInfo(side_effect_free_whitelist_filename_
, &db_info
)
607 && db_info
.size
!= 0) {
608 const base::TimeTicks before
= base::TimeTicks::Now();
609 side_effect_free_whitelist_prefix_set_
=
610 safe_browsing::PrefixSet::LoadFile(
611 side_effect_free_whitelist_prefix_set_filename_
);
612 DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
614 << (base::TimeTicks::Now() - before
).InMilliseconds() << " ms";
615 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
616 base::TimeTicks::Now() - before
);
617 if (!side_effect_free_whitelist_prefix_set_
.get())
618 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ
);
621 // Delete any files of the side-effect free sidelist that may be around
622 // from when it was previously enabled.
623 SafeBrowsingStoreFile::DeleteStore(
624 SideEffectFreeWhitelistDBFilename(filename_base
));
627 if (ip_blacklist_store_
.get()) {
628 ip_blacklist_filename_
= IpBlacklistDBFilename(filename_base
);
629 ip_blacklist_store_
->Init(
630 ip_blacklist_filename_
,
631 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
632 base::Unretained(this)));
633 DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
634 << ip_blacklist_filename_
.value();
635 std::vector
<SBAddFullHash
> full_hashes
;
636 if (ip_blacklist_store_
->GetAddFullHashes(&full_hashes
)) {
637 LoadIpBlacklist(full_hashes
);
639 DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
640 LoadIpBlacklist(std::vector
<SBAddFullHash
>()); // Clear the list.
645 bool SafeBrowsingDatabaseNew::ResetDatabase() {
646 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
648 // Delete files on disk.
649 // TODO(shess): Hard to see where one might want to delete without a
650 // reset. Perhaps inline |Delete()|?
654 // Reset objects in memory.
656 base::AutoLock
locked(lookup_lock_
);
657 full_browse_hashes_
.clear();
658 pending_browse_hashes_
.clear();
659 prefix_miss_cache_
.clear();
660 browse_prefix_set_
.reset();
661 side_effect_free_whitelist_prefix_set_
.reset();
662 ip_blacklist_
.clear();
664 // Wants to acquire the lock itself.
665 WhitelistEverything(&csd_whitelist_
);
666 WhitelistEverything(&download_whitelist_
);
670 // TODO(lzheng): Remove matching_list, it is not used anywhere.
671 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
673 std::string
* matching_list
,
674 std::vector
<SBPrefix
>* prefix_hits
,
675 std::vector
<SBFullHashResult
>* full_hits
,
676 base::Time last_update
) {
677 // Clear the results first.
678 matching_list
->clear();
679 prefix_hits
->clear();
682 std::vector
<SBFullHash
> full_hashes
;
683 BrowseFullHashesToCheck(url
, false, &full_hashes
);
684 if (full_hashes
.empty())
687 // This function is called on the I/O thread, prevent changes to
688 // filter and caches.
689 base::AutoLock
locked(lookup_lock_
);
691 // |browse_prefix_set_| is empty until it is either read from disk, or the
692 // first update populates it. Bail out without a hit if not yet
694 if (!browse_prefix_set_
.get())
697 size_t miss_count
= 0;
698 for (size_t i
= 0; i
< full_hashes
.size(); ++i
) {
699 const SBPrefix prefix
= full_hashes
[i
].prefix
;
700 if (browse_prefix_set_
->Exists(prefix
)) {
701 prefix_hits
->push_back(prefix
);
702 if (prefix_miss_cache_
.count(prefix
) > 0)
707 // If all the prefixes are cached as 'misses', don't issue a GetHash.
708 if (miss_count
== prefix_hits
->size())
711 // Find the matching full-hash results. |full_browse_hashes_| are from the
712 // database, |pending_browse_hashes_| are from GetHash requests between
714 std::sort(prefix_hits
->begin(), prefix_hits
->end());
716 GetCachedFullHashesForBrowse(*prefix_hits
, full_browse_hashes_
,
717 full_hits
, last_update
);
718 GetCachedFullHashesForBrowse(*prefix_hits
, pending_browse_hashes_
,
719 full_hits
, last_update
);
723 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
724 const std::vector
<GURL
>& urls
,
725 std::vector
<SBPrefix
>* prefix_hits
) {
726 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
728 // Ignore this check when download checking is not enabled.
729 if (!download_store_
.get())
732 std::vector
<SBPrefix
> prefixes
;
733 GetDownloadUrlPrefixes(urls
, &prefixes
);
734 return MatchAddPrefixes(download_store_
.get(),
735 safe_browsing_util::BINURL
% 2,
740 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL
& url
) {
741 // This method is theoretically thread-safe but we expect all calls to
742 // originate from the IO thread.
743 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
744 std::vector
<SBFullHash
> full_hashes
;
745 BrowseFullHashesToCheck(url
, true, &full_hashes
);
746 return ContainsWhitelistedHashes(csd_whitelist_
, full_hashes
);
749 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL
& url
) {
750 std::vector
<SBFullHash
> full_hashes
;
751 BrowseFullHashesToCheck(url
, true, &full_hashes
);
752 return ContainsWhitelistedHashes(download_whitelist_
, full_hashes
);
755 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
756 const std::vector
<SBPrefix
>& prefixes
,
757 std::vector
<SBPrefix
>* prefix_hits
) {
758 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
759 if (!extension_blacklist_store_
)
762 return MatchAddPrefixes(extension_blacklist_store_
.get(),
763 safe_browsing_util::EXTENSIONBLACKLIST
% 2,
768 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
773 safe_browsing_util::CanonicalizeUrl(url
, &host
, &path
, &query
);
774 std::string url_to_check
= host
+ path
;
776 url_to_check
+= "?" + query
;
777 SBFullHash full_hash
= SBFullHashForString(url_to_check
);
779 // This function can be called on any thread, so lock against any changes
780 base::AutoLock
locked(lookup_lock_
);
782 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
783 // from disk, or the first update populates it. Bail out without a hit if
784 // not yet available.
785 if (!side_effect_free_whitelist_prefix_set_
.get())
788 return side_effect_free_whitelist_prefix_set_
->Exists(full_hash
.prefix
);
791 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string
& ip_address
) {
792 net::IPAddressNumber ip_number
;
793 if (!net::ParseIPLiteralToNumber(ip_address
, &ip_number
)) {
794 DVLOG(2) << "Unable to parse IP address: '" << ip_address
<< "'";
797 if (ip_number
.size() == net::kIPv4AddressSize
) {
798 ip_number
= net::ConvertIPv4NumberToIPv6Number(ip_number
);
800 if (ip_number
.size() != net::kIPv6AddressSize
) {
801 DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
802 << ip_address
<< "'";
803 return false; // better safe than sorry.
805 // This function can be called from any thread.
806 base::AutoLock
locked(lookup_lock_
);
807 for (IPBlacklist::const_iterator it
= ip_blacklist_
.begin();
808 it
!= ip_blacklist_
.end();
810 const std::string
& mask
= it
->first
;
811 DCHECK_EQ(mask
.size(), ip_number
.size());
812 std::string
subnet(net::kIPv6AddressSize
, '\0');
813 for (size_t i
= 0; i
< net::kIPv6AddressSize
; ++i
) {
814 subnet
[i
] = ip_number
[i
] & mask
[i
];
816 const std::string hash
= base::SHA1HashString(subnet
);
817 DVLOG(2) << "Lookup Malware IP: "
818 << " ip:" << ip_address
819 << " mask:" << base::HexEncode(mask
.data(), mask
.size())
820 << " subnet:" << base::HexEncode(subnet
.data(), subnet
.size())
821 << " hash:" << base::HexEncode(hash
.data(), hash
.size());
822 if (it
->second
.count(hash
) > 0) {
829 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
830 const std::string
& str
) {
831 std::vector
<SBFullHash
> hashes
;
832 hashes
.push_back(SBFullHashForString(str
));
833 return ContainsWhitelistedHashes(download_whitelist_
, hashes
);
836 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
837 const SBWhitelist
& whitelist
,
838 const std::vector
<SBFullHash
>& hashes
) {
839 base::AutoLock
l(lookup_lock_
);
840 if (whitelist
.second
)
842 for (std::vector
<SBFullHash
>::const_iterator it
= hashes
.begin();
843 it
!= hashes
.end(); ++it
) {
844 if (std::binary_search(whitelist
.first
.begin(), whitelist
.first
.end(),
845 *it
, SBFullHashLess
)) {
852 // Helper to insert entries for all of the prefixes or full hashes in
853 // |entry| into the store.
854 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id
, SBPrefix host
,
855 const SBEntry
* entry
, int list_id
) {
856 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
858 SafeBrowsingStore
* store
= GetStore(list_id
);
861 STATS_COUNTER("SB.HostInsert", 1);
862 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
863 const int count
= entry
->prefix_count();
865 DCHECK(!entry
->IsSub());
867 // No prefixes, use host instead.
868 STATS_COUNTER("SB.PrefixAdd", 1);
869 store
->WriteAddPrefix(encoded_chunk_id
, host
);
870 } else if (entry
->IsPrefix()) {
872 for (int i
= 0; i
< count
; i
++) {
873 const SBPrefix prefix
= entry
->PrefixAt(i
);
874 STATS_COUNTER("SB.PrefixAdd", 1);
875 store
->WriteAddPrefix(encoded_chunk_id
, prefix
);
878 // Prefixes and hashes.
879 const base::Time receive_time
= base::Time::Now();
880 for (int i
= 0; i
< count
; ++i
) {
881 const SBFullHash full_hash
= entry
->FullHashAt(i
);
882 const SBPrefix prefix
= full_hash
.prefix
;
884 STATS_COUNTER("SB.PrefixAdd", 1);
885 store
->WriteAddPrefix(encoded_chunk_id
, prefix
);
887 STATS_COUNTER("SB.PrefixAddFull", 1);
888 store
->WriteAddHash(encoded_chunk_id
, receive_time
, full_hash
);
893 // Helper to iterate over all the entries in the hosts in |chunks| and
894 // add them to the store.
895 void SafeBrowsingDatabaseNew::InsertAddChunks(
896 const safe_browsing_util::ListType list_id
,
897 const SBChunkList
& chunks
) {
898 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
900 SafeBrowsingStore
* store
= GetStore(list_id
);
903 for (SBChunkList::const_iterator citer
= chunks
.begin();
904 citer
!= chunks
.end(); ++citer
) {
905 const int chunk_id
= citer
->chunk_number
;
907 // The server can give us a chunk that we already have because
908 // it's part of a range. Don't add it again.
909 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
910 if (store
->CheckAddChunk(encoded_chunk_id
))
913 store
->SetAddChunk(encoded_chunk_id
);
914 for (std::deque
<SBChunkHost
>::const_iterator hiter
= citer
->hosts
.begin();
915 hiter
!= citer
->hosts
.end(); ++hiter
) {
916 // NOTE: Could pass |encoded_chunk_id|, but then inserting add
917 // chunks would look different from inserting sub chunks.
918 InsertAdd(chunk_id
, hiter
->host
, hiter
->entry
, list_id
);
923 // Helper to insert entries for all of the prefixes or full hashes in
924 // |entry| into the store.
925 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id
, SBPrefix host
,
926 const SBEntry
* entry
, int list_id
) {
927 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
929 SafeBrowsingStore
* store
= GetStore(list_id
);
932 STATS_COUNTER("SB.HostDelete", 1);
933 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
934 const int count
= entry
->prefix_count();
936 DCHECK(entry
->IsSub());
938 // No prefixes, use host instead.
939 STATS_COUNTER("SB.PrefixSub", 1);
940 const int add_chunk_id
= EncodeChunkId(entry
->chunk_id(), list_id
);
941 store
->WriteSubPrefix(encoded_chunk_id
, add_chunk_id
, host
);
942 } else if (entry
->IsPrefix()) {
944 for (int i
= 0; i
< count
; i
++) {
945 const SBPrefix prefix
= entry
->PrefixAt(i
);
946 const int add_chunk_id
=
947 EncodeChunkId(entry
->ChunkIdAtPrefix(i
), list_id
);
949 STATS_COUNTER("SB.PrefixSub", 1);
950 store
->WriteSubPrefix(encoded_chunk_id
, add_chunk_id
, prefix
);
953 // Prefixes and hashes.
954 for (int i
= 0; i
< count
; ++i
) {
955 const SBFullHash full_hash
= entry
->FullHashAt(i
);
956 const int add_chunk_id
=
957 EncodeChunkId(entry
->ChunkIdAtPrefix(i
), list_id
);
959 STATS_COUNTER("SB.PrefixSub", 1);
960 store
->WriteSubPrefix(encoded_chunk_id
, add_chunk_id
, full_hash
.prefix
);
962 STATS_COUNTER("SB.PrefixSubFull", 1);
963 store
->WriteSubHash(encoded_chunk_id
, add_chunk_id
, full_hash
);
968 // Helper to iterate over all the entries in the hosts in |chunks| and
969 // add them to the store.
970 void SafeBrowsingDatabaseNew::InsertSubChunks(
971 safe_browsing_util::ListType list_id
,
972 const SBChunkList
& chunks
) {
973 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
975 SafeBrowsingStore
* store
= GetStore(list_id
);
978 for (SBChunkList::const_iterator citer
= chunks
.begin();
979 citer
!= chunks
.end(); ++citer
) {
980 const int chunk_id
= citer
->chunk_number
;
982 // The server can give us a chunk that we already have because
983 // it's part of a range. Don't add it again.
984 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
985 if (store
->CheckSubChunk(encoded_chunk_id
))
988 store
->SetSubChunk(encoded_chunk_id
);
989 for (std::deque
<SBChunkHost
>::const_iterator hiter
= citer
->hosts
.begin();
990 hiter
!= citer
->hosts
.end(); ++hiter
) {
991 InsertSub(chunk_id
, hiter
->host
, hiter
->entry
, list_id
);
996 void SafeBrowsingDatabaseNew::InsertChunks(const std::string
& list_name
,
997 const SBChunkList
& chunks
) {
998 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1000 if (corruption_detected_
|| chunks
.empty())
1003 const base::TimeTicks before
= base::TimeTicks::Now();
1005 const safe_browsing_util::ListType list_id
=
1006 safe_browsing_util::GetListId(list_name
);
1007 DVLOG(2) << list_name
<< ": " << list_id
;
1009 SafeBrowsingStore
* store
= GetStore(list_id
);
1012 change_detected_
= true;
1014 store
->BeginChunk();
1015 if (chunks
.front().is_add
) {
1016 InsertAddChunks(list_id
, chunks
);
1018 InsertSubChunks(list_id
, chunks
);
1020 store
->FinishChunk();
1022 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before
);
1025 void SafeBrowsingDatabaseNew::DeleteChunks(
1026 const std::vector
<SBChunkDelete
>& chunk_deletes
) {
1027 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1029 if (corruption_detected_
|| chunk_deletes
.empty())
1032 const std::string
& list_name
= chunk_deletes
.front().list_name
;
1033 const safe_browsing_util::ListType list_id
=
1034 safe_browsing_util::GetListId(list_name
);
1036 SafeBrowsingStore
* store
= GetStore(list_id
);
1039 change_detected_
= true;
1041 for (size_t i
= 0; i
< chunk_deletes
.size(); ++i
) {
1042 std::vector
<int> chunk_numbers
;
1043 RangesToChunks(chunk_deletes
[i
].chunk_del
, &chunk_numbers
);
1044 for (size_t j
= 0; j
< chunk_numbers
.size(); ++j
) {
1045 const int encoded_chunk_id
= EncodeChunkId(chunk_numbers
[j
], list_id
);
1046 if (chunk_deletes
[i
].is_sub_del
)
1047 store
->DeleteSubChunk(encoded_chunk_id
);
1049 store
->DeleteAddChunk(encoded_chunk_id
);
1054 void SafeBrowsingDatabaseNew::CacheHashResults(
1055 const std::vector
<SBPrefix
>& prefixes
,
1056 const std::vector
<SBFullHashResult
>& full_hits
) {
1057 // This is called on the I/O thread, lock against updates.
1058 base::AutoLock
locked(lookup_lock_
);
1060 if (full_hits
.empty()) {
1061 prefix_miss_cache_
.insert(prefixes
.begin(), prefixes
.end());
1065 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
1066 // Refactor to make them identical.
1067 const base::Time now
= base::Time::Now();
1068 const size_t orig_size
= pending_browse_hashes_
.size();
1069 for (std::vector
<SBFullHashResult
>::const_iterator iter
= full_hits
.begin();
1070 iter
!= full_hits
.end(); ++iter
) {
1071 const int list_id
= safe_browsing_util::GetListId(iter
->list_name
);
1072 if (list_id
== safe_browsing_util::MALWARE
||
1073 list_id
== safe_browsing_util::PHISH
) {
1074 int encoded_chunk_id
= EncodeChunkId(iter
->add_chunk_id
, list_id
);
1075 SBAddFullHash
add_full_hash(encoded_chunk_id
, now
, iter
->hash
);
1076 pending_browse_hashes_
.push_back(add_full_hash
);
1080 // Sort new entries then merge with the previously-sorted entries.
1081 std::vector
<SBAddFullHash
>::iterator
1082 orig_end
= pending_browse_hashes_
.begin() + orig_size
;
1083 std::sort(orig_end
, pending_browse_hashes_
.end(), SBAddFullHashPrefixLess
);
1084 std::inplace_merge(pending_browse_hashes_
.begin(),
1085 orig_end
, pending_browse_hashes_
.end(),
1086 SBAddFullHashPrefixLess
);
1089 bool SafeBrowsingDatabaseNew::UpdateStarted(
1090 std::vector
<SBListChunkRanges
>* lists
) {
1091 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1094 // If |BeginUpdate()| fails, reset the database.
1095 if (!browse_store_
->BeginUpdate()) {
1096 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN
);
1097 HandleCorruptDatabase();
1101 if (download_store_
.get() && !download_store_
->BeginUpdate()) {
1102 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN
);
1103 HandleCorruptDatabase();
1107 if (csd_whitelist_store_
.get() && !csd_whitelist_store_
->BeginUpdate()) {
1108 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
);
1109 HandleCorruptDatabase();
1113 if (download_whitelist_store_
.get() &&
1114 !download_whitelist_store_
->BeginUpdate()) {
1115 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
);
1116 HandleCorruptDatabase();
1120 if (extension_blacklist_store_
&&
1121 !extension_blacklist_store_
->BeginUpdate()) {
1122 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN
);
1123 HandleCorruptDatabase();
1127 if (side_effect_free_whitelist_store_
&&
1128 !side_effect_free_whitelist_store_
->BeginUpdate()) {
1129 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN
);
1130 HandleCorruptDatabase();
1134 if (ip_blacklist_store_
&& !ip_blacklist_store_
->BeginUpdate()) {
1135 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN
);
1136 HandleCorruptDatabase();
1140 UpdateChunkRangesForLists(browse_store_
.get(),
1141 safe_browsing_util::kMalwareList
,
1142 safe_browsing_util::kPhishingList
,
1145 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1146 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1147 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1148 // extra data of that sort.
1149 UpdateChunkRangesForList(download_store_
.get(),
1150 safe_browsing_util::kBinUrlList
, lists
);
1152 UpdateChunkRangesForList(csd_whitelist_store_
.get(),
1153 safe_browsing_util::kCsdWhiteList
, lists
);
1155 UpdateChunkRangesForList(download_whitelist_store_
.get(),
1156 safe_browsing_util::kDownloadWhiteList
, lists
);
1158 UpdateChunkRangesForList(extension_blacklist_store_
.get(),
1159 safe_browsing_util::kExtensionBlacklist
, lists
);
1161 UpdateChunkRangesForList(side_effect_free_whitelist_store_
.get(),
1162 safe_browsing_util::kSideEffectFreeWhitelist
, lists
);
1164 UpdateChunkRangesForList(ip_blacklist_store_
.get(),
1165 safe_browsing_util::kIPBlacklist
, lists
);
1167 corruption_detected_
= false;
1168 change_detected_
= false;
1172 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded
) {
1173 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1175 // The update may have failed due to corrupt storage (for instance,
1176 // an excessive number of invalid add_chunks and sub_chunks).
1177 // Double-check that the databases are valid.
1178 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1179 // sections would allow throwing a corruption error in
1181 if (!update_succeeded
) {
1182 if (!browse_store_
->CheckValidity())
1183 DLOG(ERROR
) << "Safe-browsing browse database corrupt.";
1185 if (download_store_
.get() && !download_store_
->CheckValidity())
1186 DLOG(ERROR
) << "Safe-browsing download database corrupt.";
1188 if (csd_whitelist_store_
.get() && !csd_whitelist_store_
->CheckValidity())
1189 DLOG(ERROR
) << "Safe-browsing csd whitelist database corrupt.";
1191 if (download_whitelist_store_
.get() &&
1192 !download_whitelist_store_
->CheckValidity()) {
1193 DLOG(ERROR
) << "Safe-browsing download whitelist database corrupt.";
1196 if (extension_blacklist_store_
&&
1197 !extension_blacklist_store_
->CheckValidity()) {
1198 DLOG(ERROR
) << "Safe-browsing extension blacklist database corrupt.";
1201 if (side_effect_free_whitelist_store_
&&
1202 !side_effect_free_whitelist_store_
->CheckValidity()) {
1203 DLOG(ERROR
) << "Safe-browsing side-effect free whitelist database "
1207 if (ip_blacklist_store_
&& !ip_blacklist_store_
->CheckValidity()) {
1208 DLOG(ERROR
) << "Safe-browsing IP blacklist database corrupt.";
1212 if (corruption_detected_
)
1215 // Unroll the transaction if there was a protocol error or if the
1216 // transaction was empty. This will leave the prefix set, the
1217 // pending hashes, and the prefix miss cache in place.
1218 if (!update_succeeded
|| !change_detected_
) {
1219 // Track empty updates to answer questions at http://crbug.com/72216 .
1220 if (update_succeeded
&& !change_detected_
)
1221 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1222 browse_store_
->CancelUpdate();
1223 if (download_store_
.get())
1224 download_store_
->CancelUpdate();
1225 if (csd_whitelist_store_
.get())
1226 csd_whitelist_store_
->CancelUpdate();
1227 if (download_whitelist_store_
.get())
1228 download_whitelist_store_
->CancelUpdate();
1229 if (extension_blacklist_store_
)
1230 extension_blacklist_store_
->CancelUpdate();
1231 if (side_effect_free_whitelist_store_
)
1232 side_effect_free_whitelist_store_
->CancelUpdate();
1233 if (ip_blacklist_store_
)
1234 ip_blacklist_store_
->CancelUpdate();
1238 if (download_store_
) {
1239 int64 size_bytes
= UpdateHashPrefixStore(
1241 download_store_
.get(),
1242 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH
);
1243 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1244 static_cast<int>(size_bytes
/ 1024));
1247 UpdateBrowseStore();
1248 UpdateWhitelistStore(csd_whitelist_filename_
,
1249 csd_whitelist_store_
.get(),
1251 UpdateWhitelistStore(download_whitelist_filename_
,
1252 download_whitelist_store_
.get(),
1253 &download_whitelist_
);
1255 if (extension_blacklist_store_
) {
1256 int64 size_bytes
= UpdateHashPrefixStore(
1257 extension_blacklist_filename_
,
1258 extension_blacklist_store_
.get(),
1259 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH
);
1260 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1261 static_cast<int>(size_bytes
/ 1024));
1264 if (side_effect_free_whitelist_store_
)
1265 UpdateSideEffectFreeWhitelistStore();
1267 if (ip_blacklist_store_
)
1268 UpdateIpBlacklistStore();
1271 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1272 const base::FilePath
& store_filename
,
1273 SafeBrowsingStore
* store
,
1274 SBWhitelist
* whitelist
) {
1278 // For the whitelists, we don't cache and save full hashes since all
1279 // hashes are already full.
1280 std::vector
<SBAddFullHash
> empty_add_hashes
;
1282 // Note: |builder| will not be empty. The current data store implementation
1283 // stores all full-length hashes as both full and prefix hashes.
1284 safe_browsing::PrefixSetBuilder builder
;
1285 std::vector
<SBAddFullHash
> full_hashes
;
1286 if (!store
->FinishUpdate(empty_add_hashes
, &builder
, &full_hashes
)) {
1287 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH
);
1288 WhitelistEverything(whitelist
);
1292 #if defined(OS_MACOSX)
1293 base::mac::SetFileBackupExclusion(store_filename
);
1296 LoadWhitelist(full_hashes
, whitelist
);
1299 int64
SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1300 const base::FilePath
& store_filename
,
1301 SafeBrowsingStore
* store
,
1302 FailureType failure_type
) {
1303 // We don't cache and save full hashes.
1304 std::vector
<SBAddFullHash
> empty_add_hashes
;
1306 // These results are not used after this call. Simply ignore the
1307 // returned value after FinishUpdate(...).
1308 safe_browsing::PrefixSetBuilder builder
;
1309 std::vector
<SBAddFullHash
> add_full_hashes_result
;
1311 if (!store
->FinishUpdate(empty_add_hashes
,
1313 &add_full_hashes_result
)) {
1314 RecordFailure(failure_type
);
1317 #if defined(OS_MACOSX)
1318 base::mac::SetFileBackupExclusion(store_filename
);
1321 return GetFileSizeOrZero(store_filename
);
1324 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1325 // Copy out the pending add hashes. Copy rather than swapping in
1326 // case |ContainsBrowseURL()| is called before the new filter is complete.
1327 std::vector
<SBAddFullHash
> pending_add_hashes
;
1329 base::AutoLock
locked(lookup_lock_
);
1330 pending_add_hashes
.insert(pending_add_hashes
.end(),
1331 pending_browse_hashes_
.begin(),
1332 pending_browse_hashes_
.end());
1335 // Measure the amount of IO during the filter build.
1336 base::IoCounters io_before
, io_after
;
1337 base::ProcessHandle handle
= base::Process::Current().handle();
1338 scoped_ptr
<base::ProcessMetrics
> metric(
1339 #if !defined(OS_MACOSX)
1340 base::ProcessMetrics::CreateProcessMetrics(handle
)
1342 // Getting stats only for the current process is enough, so NULL is fine.
1343 base::ProcessMetrics::CreateProcessMetrics(handle
, NULL
)
1347 // IoCounters are currently not supported on Mac, and may not be
1348 // available for Linux, so we check the result and only show IO
1349 // stats if they are available.
1350 const bool got_counters
= metric
->GetIOCounters(&io_before
);
1352 const base::TimeTicks before
= base::TimeTicks::Now();
1354 safe_browsing::PrefixSetBuilder builder
;
1355 std::vector
<SBAddFullHash
> add_full_hashes
;
1356 if (!browse_store_
->FinishUpdate(pending_add_hashes
,
1357 &builder
, &add_full_hashes
)) {
1358 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH
);
1361 scoped_ptr
<safe_browsing::PrefixSet
> prefix_set(builder
.GetPrefixSet());
1363 // This needs to be in sorted order by prefix for efficient access.
1364 std::sort(add_full_hashes
.begin(), add_full_hashes
.end(),
1365 SBAddFullHashPrefixLess
);
1367 // Swap in the newly built filter and cache.
1369 base::AutoLock
locked(lookup_lock_
);
1370 full_browse_hashes_
.swap(add_full_hashes
);
1372 // TODO(shess): If |CacheHashResults()| is posted between the
1373 // earlier lock and this clear, those pending hashes will be lost.
1374 // It could be fixed by only removing hashes which were collected
1375 // at the earlier point. I believe that is fail-safe as-is (the
1376 // hash will be fetched again).
1377 pending_browse_hashes_
.clear();
1378 prefix_miss_cache_
.clear();
1379 browse_prefix_set_
.swap(prefix_set
);
1382 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1383 << (base::TimeTicks::Now() - before
).InMilliseconds()
1385 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before
);
1387 // Persist the prefix set to disk. Since only this thread changes
1388 // |browse_prefix_set_|, there is no need to lock.
1391 // Gather statistics.
1392 if (got_counters
&& metric
->GetIOCounters(&io_after
)) {
1393 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1394 static_cast<int>(io_after
.ReadTransferCount
-
1395 io_before
.ReadTransferCount
) / 1024);
1396 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1397 static_cast<int>(io_after
.WriteTransferCount
-
1398 io_before
.WriteTransferCount
) / 1024);
1399 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1400 static_cast<int>(io_after
.ReadOperationCount
-
1401 io_before
.ReadOperationCount
));
1402 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1403 static_cast<int>(io_after
.WriteOperationCount
-
1404 io_before
.WriteOperationCount
));
1407 int64 file_size
= GetFileSizeOrZero(browse_prefix_set_filename_
);
1408 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1409 static_cast<int>(file_size
/ 1024));
1410 file_size
= GetFileSizeOrZero(browse_filename_
);
1411 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1412 static_cast<int>(file_size
/ 1024));
1414 #if defined(OS_MACOSX)
1415 base::mac::SetFileBackupExclusion(browse_filename_
);
1419 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1420 std::vector
<SBAddFullHash
> empty_add_hashes
;
1421 safe_browsing::PrefixSetBuilder builder
;
1422 std::vector
<SBAddFullHash
> add_full_hashes_result
;
1424 if (!side_effect_free_whitelist_store_
->FinishUpdate(
1427 &add_full_hashes_result
)) {
1428 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH
);
1431 scoped_ptr
<safe_browsing::PrefixSet
> prefix_set(builder
.GetPrefixSet());
1433 // Swap in the newly built prefix set.
1435 base::AutoLock
locked(lookup_lock_
);
1436 side_effect_free_whitelist_prefix_set_
.swap(prefix_set
);
1439 const base::TimeTicks before
= base::TimeTicks::Now();
1440 const bool write_ok
= side_effect_free_whitelist_prefix_set_
->WriteFile(
1441 side_effect_free_whitelist_prefix_set_filename_
);
1442 DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1443 << "set in " << (base::TimeTicks::Now() - before
).InMilliseconds()
1445 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1446 base::TimeTicks::Now() - before
);
1449 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE
);
1451 // Gather statistics.
1452 int64 file_size
= GetFileSizeOrZero(
1453 side_effect_free_whitelist_prefix_set_filename_
);
1454 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1455 static_cast<int>(file_size
/ 1024));
1456 file_size
= GetFileSizeOrZero(side_effect_free_whitelist_filename_
);
1457 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1458 static_cast<int>(file_size
/ 1024));
1460 #if defined(OS_MACOSX)
1461 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_
);
1462 base::mac::SetFileBackupExclusion(
1463 side_effect_free_whitelist_prefix_set_filename_
);
1467 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1468 // For the IP blacklist, we don't cache and save full hashes since all
1469 // hashes are already full.
1470 std::vector
<SBAddFullHash
> empty_add_hashes
;
1472 // Note: prefixes will not be empty. The current data store implementation
1473 // stores all full-length hashes as both full and prefix hashes.
1474 safe_browsing::PrefixSetBuilder builder
;
1475 std::vector
<SBAddFullHash
> full_hashes
;
1476 if (!ip_blacklist_store_
->FinishUpdate(empty_add_hashes
,
1477 &builder
, &full_hashes
)) {
1478 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH
);
1479 LoadIpBlacklist(std::vector
<SBAddFullHash
>()); // Clear the list.
1483 #if defined(OS_MACOSX)
1484 base::mac::SetFileBackupExclusion(ip_blacklist_filename_
);
1487 LoadIpBlacklist(full_hashes
);
1490 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1491 // Reset the database after the current task has unwound (but only
1492 // reset once within the scope of a given task).
1493 if (!reset_factory_
.HasWeakPtrs()) {
1494 RecordFailure(FAILURE_DATABASE_CORRUPT
);
1495 base::MessageLoop::current()->PostTask(FROM_HERE
,
1496 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase
,
1497 reset_factory_
.GetWeakPtr()));
1501 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1502 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER
);
1503 corruption_detected_
= true; // Stop updating the database.
1505 DLOG(FATAL
) << "SafeBrowsing database was corrupt and reset";
1508 // TODO(shess): I'm not clear why this code doesn't have any
1509 // real error-handling.
1510 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1511 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1512 DCHECK(!browse_prefix_set_filename_
.empty());
1514 // If there is no database, the filter cannot be used.
1515 base::File::Info db_info
;
1516 if (!base::GetFileInfo(browse_filename_
, &db_info
) || db_info
.size
== 0)
1519 // Cleanup any stale bloom filter (no longer used).
1520 // TODO(shess): Track failure to delete?
1521 base::FilePath bloom_filter_filename
=
1522 BloomFilterForFilename(browse_filename_
);
1523 base::DeleteFile(bloom_filter_filename
, false);
1525 const base::TimeTicks before
= base::TimeTicks::Now();
1526 browse_prefix_set_
= safe_browsing::PrefixSet::LoadFile(
1527 browse_prefix_set_filename_
);
1528 DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1529 << (base::TimeTicks::Now() - before
).InMilliseconds() << " ms";
1530 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before
);
1532 if (!browse_prefix_set_
.get())
1533 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ
);
1536 bool SafeBrowsingDatabaseNew::Delete() {
1537 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1539 const bool r1
= browse_store_
->Delete();
1541 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1543 const bool r2
= download_store_
.get() ? download_store_
->Delete() : true;
1545 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1547 const bool r3
= csd_whitelist_store_
.get() ?
1548 csd_whitelist_store_
->Delete() : true;
1550 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1552 const bool r4
= download_whitelist_store_
.get() ?
1553 download_whitelist_store_
->Delete() : true;
1555 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1557 base::FilePath bloom_filter_filename
=
1558 BloomFilterForFilename(browse_filename_
);
1559 const bool r5
= base::DeleteFile(bloom_filter_filename
, false);
1561 RecordFailure(FAILURE_DATABASE_FILTER_DELETE
);
1563 const bool r6
= base::DeleteFile(browse_prefix_set_filename_
, false);
1565 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE
);
1567 const bool r7
= base::DeleteFile(extension_blacklist_filename_
, false);
1569 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE
);
1571 const bool r8
= base::DeleteFile(side_effect_free_whitelist_filename_
,
1574 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE
);
1576 const bool r9
= base::DeleteFile(
1577 side_effect_free_whitelist_prefix_set_filename_
,
1580 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE
);
1582 const bool r10
= base::DeleteFile(ip_blacklist_filename_
, false);
1584 RecordFailure(FAILURE_IP_BLACKLIST_DELETE
);
1586 return r1
&& r2
&& r3
&& r4
&& r5
&& r6
&& r7
&& r8
&& r9
&& r10
;
1589 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1590 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1592 if (!browse_prefix_set_
.get())
1595 const base::TimeTicks before
= base::TimeTicks::Now();
1596 const bool write_ok
= browse_prefix_set_
->WriteFile(
1597 browse_prefix_set_filename_
);
1598 DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1599 << (base::TimeTicks::Now() - before
).InMilliseconds() << " ms";
1600 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before
);
1603 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE
);
1605 #if defined(OS_MACOSX)
1606 base::mac::SetFileBackupExclusion(browse_prefix_set_filename_
);
1610 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist
* whitelist
) {
1611 base::AutoLock
locked(lookup_lock_
);
1612 whitelist
->second
= true;
1613 whitelist
->first
.clear();
1616 void SafeBrowsingDatabaseNew::LoadWhitelist(
1617 const std::vector
<SBAddFullHash
>& full_hashes
,
1618 SBWhitelist
* whitelist
) {
1619 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1620 if (full_hashes
.size() > kMaxWhitelistSize
) {
1621 WhitelistEverything(whitelist
);
1625 std::vector
<SBFullHash
> new_whitelist
;
1626 new_whitelist
.reserve(full_hashes
.size());
1627 for (std::vector
<SBAddFullHash
>::const_iterator it
= full_hashes
.begin();
1628 it
!= full_hashes
.end(); ++it
) {
1629 new_whitelist
.push_back(it
->full_hash
);
1631 std::sort(new_whitelist
.begin(), new_whitelist
.end(), SBFullHashLess
);
1633 SBFullHash kill_switch
= SBFullHashForString(kWhitelistKillSwitchUrl
);
1634 if (std::binary_search(new_whitelist
.begin(), new_whitelist
.end(),
1635 kill_switch
, SBFullHashLess
)) {
1636 // The kill switch is whitelisted hence we whitelist all URLs.
1637 WhitelistEverything(whitelist
);
1639 base::AutoLock
locked(lookup_lock_
);
1640 whitelist
->second
= false;
1641 whitelist
->first
.swap(new_whitelist
);
1645 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1646 const std::vector
<SBAddFullHash
>& full_hashes
) {
1647 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1648 IPBlacklist new_blacklist
;
1649 DVLOG(2) << "Writing IP blacklist of size: " << full_hashes
.size();
1650 for (std::vector
<SBAddFullHash
>::const_iterator it
= full_hashes
.begin();
1651 it
!= full_hashes
.end();
1653 const char* full_hash
= it
->full_hash
.full_hash
;
1654 DCHECK_EQ(crypto::kSHA256Length
, arraysize(it
->full_hash
.full_hash
));
1655 // The format of the IP blacklist is:
1656 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1657 std::string
hashed_ip_prefix(full_hash
, base::kSHA1Length
);
1658 size_t prefix_size
= static_cast<uint8
>(full_hash
[base::kSHA1Length
]);
1659 if (prefix_size
> kMaxIpPrefixSize
|| prefix_size
< kMinIpPrefixSize
) {
1660 DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size
;
1661 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID
);
1662 new_blacklist
.clear(); // Load empty blacklist.
1666 // We precompute the mask for the given subnet size to speed up lookups.
1667 // Basically we need to create a 16B long string which has the highest
1668 // |size| bits sets to one.
1669 std::string
mask(net::kIPv6AddressSize
, '\0');
1670 mask
.replace(0, prefix_size
/ 8, prefix_size
/ 8, '\xFF');
1671 if ((prefix_size
% 8) != 0) {
1672 mask
[prefix_size
/ 8] = 0xFF << (8 - (prefix_size
% 8));
1674 DVLOG(2) << "Inserting malicious IP: "
1675 << " raw:" << base::HexEncode(full_hash
, crypto::kSHA256Length
)
1676 << " mask:" << base::HexEncode(mask
.data(), mask
.size())
1677 << " prefix_size:" << prefix_size
1678 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix
.data(),
1679 hashed_ip_prefix
.size());
1680 new_blacklist
[mask
].insert(hashed_ip_prefix
);
1683 base::AutoLock
locked(lookup_lock_
);
1684 ip_blacklist_
.swap(new_blacklist
);
1687 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1688 SBFullHash malware_kill_switch
= SBFullHashForString(kMalwareIPKillSwitchUrl
);
1689 std::vector
<SBFullHash
> full_hashes
;
1690 full_hashes
.push_back(malware_kill_switch
);
1691 return ContainsWhitelistedHashes(csd_whitelist_
, full_hashes
);