1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
10 #include "base/bind.h"
11 #include "base/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
32 using content::BrowserThread
;
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile
[] =
38 FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile
[] =
41 FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile
[] =
44 FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile
[] =
47 FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile
[] =
50 FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile
[] =
53 FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile
[] =
56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile
[] =
59 FILE_PATH_LITERAL(" IP Blacklist");
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
67 const base::FilePath::CharType kBrowseDBFile
[] = FILE_PATH_LITERAL(" Bloom");
69 // Maximum number of entries we allow in any of the whitelists.
70 // If a whitelist on disk contains more entries then all lookups to
71 // the whitelist will be considered a match.
72 const size_t kMaxWhitelistSize
= 5000;
74 // If the hash of this exact expression is on a whitelist then all
75 // lookups to this whitelist will be considered a match.
76 const char kWhitelistKillSwitchUrl
[] =
77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
79 // If the hash of this exact expression is on a whitelist then the
80 // malware IP blacklisting feature will be disabled in csd.
82 const char kMalwareIPKillSwitchUrl
[] =
83 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
85 const size_t kMaxIpPrefixSize
= 128;
86 const size_t kMinIpPrefixSize
= 1;
88 // To save space, the incoming |chunk_id| and |list_id| are combined
89 // into an |encoded_chunk_id| for storage by shifting the |list_id|
90 // into the low-order bits. These functions decode that information.
91 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
92 // there should be better ways to save chunk_id and list_id after we use
93 // SafeBrowsingStoreFile.
94 int GetListIdBit(const int encoded_chunk_id
) {
95 return encoded_chunk_id
& 1;
97 int DecodeChunkId(int encoded_chunk_id
) {
98 return encoded_chunk_id
>> 1;
100 int EncodeChunkId(const int chunk
, const int list_id
) {
101 DCHECK_NE(list_id
, safe_browsing_util::INVALID
);
102 return chunk
<< 1 | list_id
% 2;
105 // Generate the set of full hashes to check for |url|. If
106 // |include_whitelist_hashes| is true we will generate additional path-prefixes
107 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the
108 // whitelist it should also match /foo/bar which is not the case for all the
109 // other lists. We'll also always add a pattern for the empty path.
110 // TODO(shess): This function is almost the same as
111 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
112 // does an early exit on match. Since match should be the infrequent
113 // case (phishing or malware found), consider combining this function
115 void BrowseFullHashesToCheck(const GURL
& url
,
116 bool include_whitelist_hashes
,
117 std::vector
<SBFullHash
>* full_hashes
) {
118 std::vector
<std::string
> hosts
;
119 if (url
.HostIsIPAddress()) {
120 hosts
.push_back(url
.host());
122 safe_browsing_util::GenerateHostsToCheck(url
, &hosts
);
125 std::vector
<std::string
> paths
;
126 safe_browsing_util::GeneratePathsToCheck(url
, &paths
);
128 for (size_t i
= 0; i
< hosts
.size(); ++i
) {
129 for (size_t j
= 0; j
< paths
.size(); ++j
) {
130 const std::string
& path
= paths
[j
];
131 full_hashes
->push_back(SBFullHashForString(hosts
[i
] + path
));
133 // We may have /foo as path-prefix in the whitelist which should
134 // also match with /foo/bar and /foo?bar. Hence, for every path
135 // that ends in '/' we also add the path without the slash.
136 if (include_whitelist_hashes
&&
138 path
[path
.size() - 1] == '/') {
139 full_hashes
->push_back(
140 SBFullHashForString(hosts
[i
] + path
.substr(0, path
.size() - 1)));
146 // Get the prefixes matching the download |urls|.
147 void GetDownloadUrlPrefixes(const std::vector
<GURL
>& urls
,
148 std::vector
<SBPrefix
>* prefixes
) {
149 std::vector
<SBFullHash
> full_hashes
;
150 for (size_t i
= 0; i
< urls
.size(); ++i
)
151 BrowseFullHashesToCheck(urls
[i
], false, &full_hashes
);
153 for (size_t i
= 0; i
< full_hashes
.size(); ++i
)
154 prefixes
->push_back(full_hashes
[i
].prefix
);
157 // Helper function to compare addprefixes in |store| with |prefixes|.
158 // The |list_bit| indicates which list (url or hash) to compare.
160 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
161 // the actual matching prefixes.
162 bool MatchAddPrefixes(SafeBrowsingStore
* store
,
164 const std::vector
<SBPrefix
>& prefixes
,
165 std::vector
<SBPrefix
>* prefix_hits
) {
166 prefix_hits
->clear();
167 bool found_match
= false;
169 SBAddPrefixes add_prefixes
;
170 store
->GetAddPrefixes(&add_prefixes
);
171 for (SBAddPrefixes::const_iterator iter
= add_prefixes
.begin();
172 iter
!= add_prefixes
.end(); ++iter
) {
173 for (size_t j
= 0; j
< prefixes
.size(); ++j
) {
174 const SBPrefix
& prefix
= prefixes
[j
];
175 if (prefix
== iter
->prefix
&&
176 GetListIdBit(iter
->chunk_id
) == list_bit
) {
177 prefix_hits
->push_back(prefix
);
185 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
186 // add them to |full_hits| if not expired. "Not expired" is when
187 // either |last_update| was recent enough, or the item has been
188 // received recently enough. Expired items are not deleted because a
189 // future update may make them acceptable again.
191 // For efficiency reasons the code walks |prefix_hits| and
192 // |full_hashes| in parallel, so they must be sorted by prefix.
193 void GetCachedFullHashesForBrowse(
194 const std::vector
<SBPrefix
>& prefix_hits
,
195 const std::vector
<SBFullHashCached
>& full_hashes
,
196 std::vector
<SBFullHashResult
>* full_hits
) {
197 const base::Time now
= base::Time::Now();
199 std::vector
<SBPrefix
>::const_iterator piter
= prefix_hits
.begin();
200 std::vector
<SBFullHashCached
>::const_iterator hiter
= full_hashes
.begin();
202 while (piter
!= prefix_hits
.end() && hiter
!= full_hashes
.end()) {
203 if (*piter
< hiter
->hash
.prefix
) {
205 } else if (hiter
->hash
.prefix
< *piter
) {
208 if (now
<= hiter
->expire_after
) {
209 SBFullHashResult result
;
210 result
.list_id
= hiter
->list_id
;
211 result
.hash
= hiter
->hash
;
212 full_hits
->push_back(result
);
215 // Only increment |hiter|, |piter| might have multiple hits.
221 // This function generates a chunk range string for |chunks|. It
222 // outputs one chunk range string per list and writes it to the
223 // |list_ranges| vector. We expect |list_ranges| to already be of the
224 // right size. E.g., if |chunks| contains chunks with two different
225 // list ids then |list_ranges| must contain two elements.
226 void GetChunkRanges(const std::vector
<int>& chunks
,
227 std::vector
<std::string
>* list_ranges
) {
228 // Since there are 2 possible list ids, there must be exactly two
229 // list ranges. Even if the chunk data should only contain one
230 // line, this code has to somehow handle corruption.
231 DCHECK_EQ(2U, list_ranges
->size());
233 std::vector
<std::vector
<int> > decoded_chunks(list_ranges
->size());
234 for (std::vector
<int>::const_iterator iter
= chunks
.begin();
235 iter
!= chunks
.end(); ++iter
) {
236 int mod_list_id
= GetListIdBit(*iter
);
237 DCHECK_GE(mod_list_id
, 0);
238 DCHECK_LT(static_cast<size_t>(mod_list_id
), decoded_chunks
.size());
239 decoded_chunks
[mod_list_id
].push_back(DecodeChunkId(*iter
));
241 for (size_t i
= 0; i
< decoded_chunks
.size(); ++i
) {
242 ChunksToRangeString(decoded_chunks
[i
], &((*list_ranges
)[i
]));
246 // Helper function to create chunk range lists for Browse related
248 void UpdateChunkRanges(SafeBrowsingStore
* store
,
249 const std::vector
<std::string
>& listnames
,
250 std::vector
<SBListChunkRanges
>* lists
) {
254 DCHECK_GT(listnames
.size(), 0U);
255 DCHECK_LE(listnames
.size(), 2U);
256 std::vector
<int> add_chunks
;
257 std::vector
<int> sub_chunks
;
258 store
->GetAddChunks(&add_chunks
);
259 store
->GetSubChunks(&sub_chunks
);
261 // Always decode 2 ranges, even if only the first one is expected.
262 // The loop below will only load as many into |lists| as |listnames|
264 std::vector
<std::string
> adds(2);
265 std::vector
<std::string
> subs(2);
266 GetChunkRanges(add_chunks
, &adds
);
267 GetChunkRanges(sub_chunks
, &subs
);
269 for (size_t i
= 0; i
< listnames
.size(); ++i
) {
270 const std::string
& listname
= listnames
[i
];
271 DCHECK_EQ(safe_browsing_util::GetListId(listname
) % 2,
272 static_cast<int>(i
% 2));
273 DCHECK_NE(safe_browsing_util::GetListId(listname
),
274 safe_browsing_util::INVALID
);
275 lists
->push_back(SBListChunkRanges(listname
));
276 lists
->back().adds
.swap(adds
[i
]);
277 lists
->back().subs
.swap(subs
[i
]);
281 void UpdateChunkRangesForLists(SafeBrowsingStore
* store
,
282 const std::string
& listname0
,
283 const std::string
& listname1
,
284 std::vector
<SBListChunkRanges
>* lists
) {
285 std::vector
<std::string
> listnames
;
286 listnames
.push_back(listname0
);
287 listnames
.push_back(listname1
);
288 UpdateChunkRanges(store
, listnames
, lists
);
291 void UpdateChunkRangesForList(SafeBrowsingStore
* store
,
292 const std::string
& listname
,
293 std::vector
<SBListChunkRanges
>* lists
) {
294 UpdateChunkRanges(store
, std::vector
<std::string
>(1, listname
), lists
);
297 // Order |SBFullHashCached| items on the prefix part.
298 bool SBFullHashCachedPrefixLess(const SBFullHashCached
& a
,
299 const SBFullHashCached
& b
) {
300 return a
.hash
.prefix
< b
.hash
.prefix
;
303 // This code always checks for non-zero file size. This helper makes
304 // that less verbose.
305 int64
GetFileSizeOrZero(const base::FilePath
& file_path
) {
307 if (!base::GetFileSize(file_path
, &size_64
))
314 // The default SafeBrowsingDatabaseFactory.
315 class SafeBrowsingDatabaseFactoryImpl
: public SafeBrowsingDatabaseFactory
{
317 virtual SafeBrowsingDatabase
* CreateSafeBrowsingDatabase(
318 bool enable_download_protection
,
319 bool enable_client_side_whitelist
,
320 bool enable_download_whitelist
,
321 bool enable_extension_blacklist
,
322 bool enable_side_effect_free_whitelist
,
323 bool enable_ip_blacklist
) OVERRIDE
{
324 return new SafeBrowsingDatabaseNew(
325 new SafeBrowsingStoreFile
,
326 enable_download_protection
? new SafeBrowsingStoreFile
: NULL
,
327 enable_client_side_whitelist
? new SafeBrowsingStoreFile
: NULL
,
328 enable_download_whitelist
? new SafeBrowsingStoreFile
: NULL
,
329 enable_extension_blacklist
? new SafeBrowsingStoreFile
: NULL
,
330 enable_side_effect_free_whitelist
? new SafeBrowsingStoreFile
: NULL
,
331 enable_ip_blacklist
? new SafeBrowsingStoreFile
: NULL
);
334 SafeBrowsingDatabaseFactoryImpl() { }
337 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl
);
341 SafeBrowsingDatabaseFactory
* SafeBrowsingDatabase::factory_
= NULL
;
343 // Factory method, non-thread safe. Caller has to make sure this s called
344 // on SafeBrowsing Thread.
345 // TODO(shess): There's no need for a factory any longer. Convert
346 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
347 // callers just construct things directly.
348 SafeBrowsingDatabase
* SafeBrowsingDatabase::Create(
349 bool enable_download_protection
,
350 bool enable_client_side_whitelist
,
351 bool enable_download_whitelist
,
352 bool enable_extension_blacklist
,
353 bool enable_side_effect_free_whitelist
,
354 bool enable_ip_blacklist
) {
356 factory_
= new SafeBrowsingDatabaseFactoryImpl();
357 return factory_
->CreateSafeBrowsingDatabase(
358 enable_download_protection
,
359 enable_client_side_whitelist
,
360 enable_download_whitelist
,
361 enable_extension_blacklist
,
362 enable_side_effect_free_whitelist
,
363 enable_ip_blacklist
);
366 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
370 base::FilePath
SafeBrowsingDatabase::BrowseDBFilename(
371 const base::FilePath
& db_base_filename
) {
372 return base::FilePath(db_base_filename
.value() + kBrowseDBFile
);
376 base::FilePath
SafeBrowsingDatabase::DownloadDBFilename(
377 const base::FilePath
& db_base_filename
) {
378 return base::FilePath(db_base_filename
.value() + kDownloadDBFile
);
382 base::FilePath
SafeBrowsingDatabase::BloomFilterForFilename(
383 const base::FilePath
& db_filename
) {
384 return base::FilePath(db_filename
.value() + kBloomFilterFile
);
388 base::FilePath
SafeBrowsingDatabase::PrefixSetForFilename(
389 const base::FilePath
& db_filename
) {
390 return base::FilePath(db_filename
.value() + kPrefixSetFile
);
394 base::FilePath
SafeBrowsingDatabase::CsdWhitelistDBFilename(
395 const base::FilePath
& db_filename
) {
396 return base::FilePath(db_filename
.value() + kCsdWhitelistDBFile
);
400 base::FilePath
SafeBrowsingDatabase::DownloadWhitelistDBFilename(
401 const base::FilePath
& db_filename
) {
402 return base::FilePath(db_filename
.value() + kDownloadWhitelistDBFile
);
406 base::FilePath
SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
407 const base::FilePath
& db_filename
) {
408 return base::FilePath(db_filename
.value() + kExtensionBlacklistDBFile
);
412 base::FilePath
SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
413 const base::FilePath
& db_filename
) {
414 return base::FilePath(db_filename
.value() + kSideEffectFreeWhitelistDBFile
);
418 base::FilePath
SafeBrowsingDatabase::IpBlacklistDBFilename(
419 const base::FilePath
& db_filename
) {
420 return base::FilePath(db_filename
.value() + kIPBlacklistDBFile
);
423 SafeBrowsingStore
* SafeBrowsingDatabaseNew::GetStore(const int list_id
) {
424 if (list_id
== safe_browsing_util::PHISH
||
425 list_id
== safe_browsing_util::MALWARE
) {
426 return browse_store_
.get();
427 } else if (list_id
== safe_browsing_util::BINURL
) {
428 return download_store_
.get();
429 } else if (list_id
== safe_browsing_util::CSDWHITELIST
) {
430 return csd_whitelist_store_
.get();
431 } else if (list_id
== safe_browsing_util::DOWNLOADWHITELIST
) {
432 return download_whitelist_store_
.get();
433 } else if (list_id
== safe_browsing_util::EXTENSIONBLACKLIST
) {
434 return extension_blacklist_store_
.get();
435 } else if (list_id
== safe_browsing_util::SIDEEFFECTFREEWHITELIST
) {
436 return side_effect_free_whitelist_store_
.get();
437 } else if (list_id
== safe_browsing_util::IPBLACKLIST
) {
438 return ip_blacklist_store_
.get();
444 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type
) {
445 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type
,
446 FAILURE_DATABASE_MAX
);
449 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
450 : creation_loop_(base::MessageLoop::current()),
451 browse_store_(new SafeBrowsingStoreFile
),
452 reset_factory_(this),
453 corruption_detected_(false),
454 change_detected_(false) {
455 DCHECK(browse_store_
.get());
456 DCHECK(!download_store_
.get());
457 DCHECK(!csd_whitelist_store_
.get());
458 DCHECK(!download_whitelist_store_
.get());
459 DCHECK(!extension_blacklist_store_
.get());
460 DCHECK(!side_effect_free_whitelist_store_
.get());
461 DCHECK(!ip_blacklist_store_
.get());
464 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
465 SafeBrowsingStore
* browse_store
,
466 SafeBrowsingStore
* download_store
,
467 SafeBrowsingStore
* csd_whitelist_store
,
468 SafeBrowsingStore
* download_whitelist_store
,
469 SafeBrowsingStore
* extension_blacklist_store
,
470 SafeBrowsingStore
* side_effect_free_whitelist_store
,
471 SafeBrowsingStore
* ip_blacklist_store
)
472 : creation_loop_(base::MessageLoop::current()),
473 browse_store_(browse_store
),
474 download_store_(download_store
),
475 csd_whitelist_store_(csd_whitelist_store
),
476 download_whitelist_store_(download_whitelist_store
),
477 extension_blacklist_store_(extension_blacklist_store
),
478 side_effect_free_whitelist_store_(side_effect_free_whitelist_store
),
479 ip_blacklist_store_(ip_blacklist_store
),
480 reset_factory_(this),
481 corruption_detected_(false) {
482 DCHECK(browse_store_
.get());
485 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
486 // The DCHECK is disabled due to crbug.com/338486 .
487 // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
490 void SafeBrowsingDatabaseNew::Init(const base::FilePath
& filename_base
) {
491 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
492 // Ensure we haven't been run before.
493 DCHECK(browse_filename_
.empty());
494 DCHECK(download_filename_
.empty());
495 DCHECK(csd_whitelist_filename_
.empty());
496 DCHECK(download_whitelist_filename_
.empty());
497 DCHECK(extension_blacklist_filename_
.empty());
498 DCHECK(side_effect_free_whitelist_filename_
.empty());
499 DCHECK(ip_blacklist_filename_
.empty());
501 browse_filename_
= BrowseDBFilename(filename_base
);
502 browse_prefix_set_filename_
= PrefixSetForFilename(browse_filename_
);
506 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
507 base::Unretained(this)));
508 DVLOG(1) << "Init browse store: " << browse_filename_
.value();
511 // NOTE: There is no need to grab the lock in this function, since
512 // until it returns, there are no pointers to this class on other
513 // threads. Then again, that means there is no possibility of
514 // contention on the lock...
515 base::AutoLock
locked(lookup_lock_
);
516 cached_browse_hashes_
.clear();
520 if (download_store_
.get()) {
521 download_filename_
= DownloadDBFilename(filename_base
);
522 download_store_
->Init(
524 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
525 base::Unretained(this)));
526 DVLOG(1) << "Init download store: " << download_filename_
.value();
529 if (csd_whitelist_store_
.get()) {
530 csd_whitelist_filename_
= CsdWhitelistDBFilename(filename_base
);
531 csd_whitelist_store_
->Init(
532 csd_whitelist_filename_
,
533 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
534 base::Unretained(this)));
535 DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_
.value();
536 std::vector
<SBAddFullHash
> full_hashes
;
537 if (csd_whitelist_store_
->GetAddFullHashes(&full_hashes
)) {
538 LoadWhitelist(full_hashes
, &csd_whitelist_
);
540 WhitelistEverything(&csd_whitelist_
);
543 WhitelistEverything(&csd_whitelist_
); // Just to be safe.
546 if (download_whitelist_store_
.get()) {
547 download_whitelist_filename_
= DownloadWhitelistDBFilename(filename_base
);
548 download_whitelist_store_
->Init(
549 download_whitelist_filename_
,
550 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
551 base::Unretained(this)));
552 DVLOG(1) << "Init download whitelist store: "
553 << download_whitelist_filename_
.value();
554 std::vector
<SBAddFullHash
> full_hashes
;
555 if (download_whitelist_store_
->GetAddFullHashes(&full_hashes
)) {
556 LoadWhitelist(full_hashes
, &download_whitelist_
);
558 WhitelistEverything(&download_whitelist_
);
561 WhitelistEverything(&download_whitelist_
); // Just to be safe.
564 if (extension_blacklist_store_
.get()) {
565 extension_blacklist_filename_
= ExtensionBlacklistDBFilename(filename_base
);
566 extension_blacklist_store_
->Init(
567 extension_blacklist_filename_
,
568 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
569 base::Unretained(this)));
570 DVLOG(1) << "Init extension blacklist store: "
571 << extension_blacklist_filename_
.value();
574 if (side_effect_free_whitelist_store_
.get()) {
575 side_effect_free_whitelist_filename_
=
576 SideEffectFreeWhitelistDBFilename(filename_base
);
577 side_effect_free_whitelist_prefix_set_filename_
=
578 PrefixSetForFilename(side_effect_free_whitelist_filename_
);
579 side_effect_free_whitelist_store_
->Init(
580 side_effect_free_whitelist_filename_
,
581 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
582 base::Unretained(this)));
583 DVLOG(1) << "Init side-effect free whitelist store: "
584 << side_effect_free_whitelist_filename_
.value();
586 // If there is no database, the filter cannot be used.
587 base::File::Info db_info
;
588 if (base::GetFileInfo(side_effect_free_whitelist_filename_
, &db_info
)
589 && db_info
.size
!= 0) {
590 const base::TimeTicks before
= base::TimeTicks::Now();
591 side_effect_free_whitelist_prefix_set_
=
592 safe_browsing::PrefixSet::LoadFile(
593 side_effect_free_whitelist_prefix_set_filename_
);
594 DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
596 << (base::TimeTicks::Now() - before
).InMilliseconds() << " ms";
597 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
598 base::TimeTicks::Now() - before
);
599 if (!side_effect_free_whitelist_prefix_set_
.get())
600 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ
);
603 // Delete any files of the side-effect free sidelist that may be around
604 // from when it was previously enabled.
605 SafeBrowsingStoreFile::DeleteStore(
606 SideEffectFreeWhitelistDBFilename(filename_base
));
609 if (ip_blacklist_store_
.get()) {
610 ip_blacklist_filename_
= IpBlacklistDBFilename(filename_base
);
611 ip_blacklist_store_
->Init(
612 ip_blacklist_filename_
,
613 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
614 base::Unretained(this)));
615 DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
616 << ip_blacklist_filename_
.value();
617 std::vector
<SBAddFullHash
> full_hashes
;
618 if (ip_blacklist_store_
->GetAddFullHashes(&full_hashes
)) {
619 LoadIpBlacklist(full_hashes
);
621 DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
622 LoadIpBlacklist(std::vector
<SBAddFullHash
>()); // Clear the list.
627 bool SafeBrowsingDatabaseNew::ResetDatabase() {
628 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
630 // Delete files on disk.
631 // TODO(shess): Hard to see where one might want to delete without a
632 // reset. Perhaps inline |Delete()|?
636 // Reset objects in memory.
638 base::AutoLock
locked(lookup_lock_
);
639 cached_browse_hashes_
.clear();
640 prefix_miss_cache_
.clear();
641 browse_prefix_set_
.reset();
642 side_effect_free_whitelist_prefix_set_
.reset();
643 ip_blacklist_
.clear();
645 // Wants to acquire the lock itself.
646 WhitelistEverything(&csd_whitelist_
);
647 WhitelistEverything(&download_whitelist_
);
651 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
653 std::vector
<SBPrefix
>* prefix_hits
,
654 std::vector
<SBFullHashResult
>* cache_hits
) {
655 // Clear the results first.
656 prefix_hits
->clear();
659 std::vector
<SBFullHash
> full_hashes
;
660 BrowseFullHashesToCheck(url
, false, &full_hashes
);
661 if (full_hashes
.empty())
664 // This function is called on the I/O thread, prevent changes to
665 // filter and caches.
666 base::AutoLock
locked(lookup_lock_
);
668 // |browse_prefix_set_| is empty until it is either read from disk, or the
669 // first update populates it. Bail out without a hit if not yet
671 if (!browse_prefix_set_
.get())
674 size_t miss_count
= 0;
675 for (size_t i
= 0; i
< full_hashes
.size(); ++i
) {
676 if (browse_prefix_set_
->Exists(full_hashes
[i
])) {
677 const SBPrefix prefix
= full_hashes
[i
].prefix
;
678 prefix_hits
->push_back(prefix
);
679 if (prefix_miss_cache_
.count(prefix
) > 0)
684 // If all the prefixes are cached as 'misses', don't issue a GetHash.
685 if (miss_count
== prefix_hits
->size())
688 // Find matching cached gethash responses.
689 std::sort(prefix_hits
->begin(), prefix_hits
->end());
690 GetCachedFullHashesForBrowse(*prefix_hits
, cached_browse_hashes_
, cache_hits
);
695 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
696 const std::vector
<GURL
>& urls
,
697 std::vector
<SBPrefix
>* prefix_hits
) {
698 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
700 // Ignore this check when download checking is not enabled.
701 if (!download_store_
.get())
704 std::vector
<SBPrefix
> prefixes
;
705 GetDownloadUrlPrefixes(urls
, &prefixes
);
706 return MatchAddPrefixes(download_store_
.get(),
707 safe_browsing_util::BINURL
% 2,
712 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL
& url
) {
713 // This method is theoretically thread-safe but we expect all calls to
714 // originate from the IO thread.
715 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
716 std::vector
<SBFullHash
> full_hashes
;
717 BrowseFullHashesToCheck(url
, true, &full_hashes
);
718 return ContainsWhitelistedHashes(csd_whitelist_
, full_hashes
);
721 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL
& url
) {
722 std::vector
<SBFullHash
> full_hashes
;
723 BrowseFullHashesToCheck(url
, true, &full_hashes
);
724 return ContainsWhitelistedHashes(download_whitelist_
, full_hashes
);
727 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
728 const std::vector
<SBPrefix
>& prefixes
,
729 std::vector
<SBPrefix
>* prefix_hits
) {
730 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
731 if (!extension_blacklist_store_
)
734 return MatchAddPrefixes(extension_blacklist_store_
.get(),
735 safe_browsing_util::EXTENSIONBLACKLIST
% 2,
740 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
745 safe_browsing_util::CanonicalizeUrl(url
, &host
, &path
, &query
);
746 std::string url_to_check
= host
+ path
;
748 url_to_check
+= "?" + query
;
749 SBFullHash full_hash
= SBFullHashForString(url_to_check
);
751 // This function can be called on any thread, so lock against any changes
752 base::AutoLock
locked(lookup_lock_
);
754 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
755 // from disk, or the first update populates it. Bail out without a hit if
756 // not yet available.
757 if (!side_effect_free_whitelist_prefix_set_
.get())
760 return side_effect_free_whitelist_prefix_set_
->Exists(full_hash
);
763 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string
& ip_address
) {
764 net::IPAddressNumber ip_number
;
765 if (!net::ParseIPLiteralToNumber(ip_address
, &ip_number
)) {
766 DVLOG(2) << "Unable to parse IP address: '" << ip_address
<< "'";
769 if (ip_number
.size() == net::kIPv4AddressSize
) {
770 ip_number
= net::ConvertIPv4NumberToIPv6Number(ip_number
);
772 if (ip_number
.size() != net::kIPv6AddressSize
) {
773 DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
774 << ip_address
<< "'";
775 return false; // better safe than sorry.
777 // This function can be called from any thread.
778 base::AutoLock
locked(lookup_lock_
);
779 for (IPBlacklist::const_iterator it
= ip_blacklist_
.begin();
780 it
!= ip_blacklist_
.end();
782 const std::string
& mask
= it
->first
;
783 DCHECK_EQ(mask
.size(), ip_number
.size());
784 std::string
subnet(net::kIPv6AddressSize
, '\0');
785 for (size_t i
= 0; i
< net::kIPv6AddressSize
; ++i
) {
786 subnet
[i
] = ip_number
[i
] & mask
[i
];
788 const std::string hash
= base::SHA1HashString(subnet
);
789 DVLOG(2) << "Lookup Malware IP: "
790 << " ip:" << ip_address
791 << " mask:" << base::HexEncode(mask
.data(), mask
.size())
792 << " subnet:" << base::HexEncode(subnet
.data(), subnet
.size())
793 << " hash:" << base::HexEncode(hash
.data(), hash
.size());
794 if (it
->second
.count(hash
) > 0) {
801 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
802 const std::string
& str
) {
803 std::vector
<SBFullHash
> hashes
;
804 hashes
.push_back(SBFullHashForString(str
));
805 return ContainsWhitelistedHashes(download_whitelist_
, hashes
);
808 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
809 const SBWhitelist
& whitelist
,
810 const std::vector
<SBFullHash
>& hashes
) {
811 base::AutoLock
l(lookup_lock_
);
812 if (whitelist
.second
)
814 for (std::vector
<SBFullHash
>::const_iterator it
= hashes
.begin();
815 it
!= hashes
.end(); ++it
) {
816 if (std::binary_search(whitelist
.first
.begin(), whitelist
.first
.end(),
817 *it
, SBFullHashLess
)) {
824 // Helper to insert entries for all of the prefixes or full hashes in
825 // |entry| into the store.
826 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id
, SBPrefix host
,
827 const SBEntry
* entry
, int list_id
) {
828 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
830 SafeBrowsingStore
* store
= GetStore(list_id
);
833 STATS_COUNTER("SB.HostInsert", 1);
834 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
835 const int count
= entry
->prefix_count();
837 DCHECK(!entry
->IsSub());
839 // No prefixes, use host instead.
840 STATS_COUNTER("SB.PrefixAdd", 1);
841 store
->WriteAddPrefix(encoded_chunk_id
, host
);
842 } else if (entry
->IsPrefix()) {
844 for (int i
= 0; i
< count
; i
++) {
845 const SBPrefix prefix
= entry
->PrefixAt(i
);
846 STATS_COUNTER("SB.PrefixAdd", 1);
847 store
->WriteAddPrefix(encoded_chunk_id
, prefix
);
851 for (int i
= 0; i
< count
; ++i
) {
852 const SBFullHash full_hash
= entry
->FullHashAt(i
);
854 STATS_COUNTER("SB.PrefixAddFull", 1);
855 store
->WriteAddHash(encoded_chunk_id
, full_hash
);
860 // Helper to iterate over all the entries in the hosts in |chunks| and
861 // add them to the store.
862 void SafeBrowsingDatabaseNew::InsertAddChunks(
863 const safe_browsing_util::ListType list_id
,
864 const SBChunkList
& chunks
) {
865 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
867 SafeBrowsingStore
* store
= GetStore(list_id
);
870 for (SBChunkList::const_iterator citer
= chunks
.begin();
871 citer
!= chunks
.end(); ++citer
) {
872 const int chunk_id
= citer
->chunk_number
;
874 // The server can give us a chunk that we already have because
875 // it's part of a range. Don't add it again.
876 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
877 if (store
->CheckAddChunk(encoded_chunk_id
))
880 store
->SetAddChunk(encoded_chunk_id
);
881 for (std::deque
<SBChunkHost
>::const_iterator hiter
= citer
->hosts
.begin();
882 hiter
!= citer
->hosts
.end(); ++hiter
) {
883 // NOTE: Could pass |encoded_chunk_id|, but then inserting add
884 // chunks would look different from inserting sub chunks.
885 InsertAdd(chunk_id
, hiter
->host
, hiter
->entry
, list_id
);
890 // Helper to insert entries for all of the prefixes or full hashes in
891 // |entry| into the store.
892 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id
, SBPrefix host
,
893 const SBEntry
* entry
, int list_id
) {
894 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
896 SafeBrowsingStore
* store
= GetStore(list_id
);
899 STATS_COUNTER("SB.HostDelete", 1);
900 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
901 const int count
= entry
->prefix_count();
903 DCHECK(entry
->IsSub());
905 // No prefixes, use host instead.
906 STATS_COUNTER("SB.PrefixSub", 1);
907 const int add_chunk_id
= EncodeChunkId(entry
->chunk_id(), list_id
);
908 store
->WriteSubPrefix(encoded_chunk_id
, add_chunk_id
, host
);
909 } else if (entry
->IsPrefix()) {
911 for (int i
= 0; i
< count
; i
++) {
912 const SBPrefix prefix
= entry
->PrefixAt(i
);
913 const int add_chunk_id
=
914 EncodeChunkId(entry
->ChunkIdAtPrefix(i
), list_id
);
916 STATS_COUNTER("SB.PrefixSub", 1);
917 store
->WriteSubPrefix(encoded_chunk_id
, add_chunk_id
, prefix
);
921 for (int i
= 0; i
< count
; ++i
) {
922 const SBFullHash full_hash
= entry
->FullHashAt(i
);
923 const int add_chunk_id
=
924 EncodeChunkId(entry
->ChunkIdAtPrefix(i
), list_id
);
926 STATS_COUNTER("SB.PrefixSubFull", 1);
927 store
->WriteSubHash(encoded_chunk_id
, add_chunk_id
, full_hash
);
932 // Helper to iterate over all the entries in the hosts in |chunks| and
933 // add them to the store.
934 void SafeBrowsingDatabaseNew::InsertSubChunks(
935 safe_browsing_util::ListType list_id
,
936 const SBChunkList
& chunks
) {
937 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
939 SafeBrowsingStore
* store
= GetStore(list_id
);
942 for (SBChunkList::const_iterator citer
= chunks
.begin();
943 citer
!= chunks
.end(); ++citer
) {
944 const int chunk_id
= citer
->chunk_number
;
946 // The server can give us a chunk that we already have because
947 // it's part of a range. Don't add it again.
948 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
949 if (store
->CheckSubChunk(encoded_chunk_id
))
952 store
->SetSubChunk(encoded_chunk_id
);
953 for (std::deque
<SBChunkHost
>::const_iterator hiter
= citer
->hosts
.begin();
954 hiter
!= citer
->hosts
.end(); ++hiter
) {
955 InsertSub(chunk_id
, hiter
->host
, hiter
->entry
, list_id
);
960 void SafeBrowsingDatabaseNew::InsertChunks(const std::string
& list_name
,
961 const SBChunkList
& chunks
) {
962 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
964 if (corruption_detected_
|| chunks
.empty())
967 const base::TimeTicks before
= base::TimeTicks::Now();
969 const safe_browsing_util::ListType list_id
=
970 safe_browsing_util::GetListId(list_name
);
971 DVLOG(2) << list_name
<< ": " << list_id
;
973 SafeBrowsingStore
* store
= GetStore(list_id
);
976 change_detected_
= true;
979 if (chunks
.front().is_add
) {
980 InsertAddChunks(list_id
, chunks
);
982 InsertSubChunks(list_id
, chunks
);
984 store
->FinishChunk();
986 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before
);
989 void SafeBrowsingDatabaseNew::DeleteChunks(
990 const std::vector
<SBChunkDelete
>& chunk_deletes
) {
991 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
993 if (corruption_detected_
|| chunk_deletes
.empty())
996 const std::string
& list_name
= chunk_deletes
.front().list_name
;
997 const safe_browsing_util::ListType list_id
=
998 safe_browsing_util::GetListId(list_name
);
1000 SafeBrowsingStore
* store
= GetStore(list_id
);
1003 change_detected_
= true;
1005 for (size_t i
= 0; i
< chunk_deletes
.size(); ++i
) {
1006 std::vector
<int> chunk_numbers
;
1007 RangesToChunks(chunk_deletes
[i
].chunk_del
, &chunk_numbers
);
1008 for (size_t j
= 0; j
< chunk_numbers
.size(); ++j
) {
1009 const int encoded_chunk_id
= EncodeChunkId(chunk_numbers
[j
], list_id
);
1010 if (chunk_deletes
[i
].is_sub_del
)
1011 store
->DeleteSubChunk(encoded_chunk_id
);
1013 store
->DeleteAddChunk(encoded_chunk_id
);
1018 void SafeBrowsingDatabaseNew::CacheHashResults(
1019 const std::vector
<SBPrefix
>& prefixes
,
1020 const std::vector
<SBFullHashResult
>& full_hits
,
1021 const base::TimeDelta
& cache_lifetime
) {
1022 const base::Time expire_after
= base::Time::Now() + cache_lifetime
;
1024 // This is called on the I/O thread, lock against updates.
1025 base::AutoLock
locked(lookup_lock_
);
1027 if (full_hits
.empty()) {
1028 prefix_miss_cache_
.insert(prefixes
.begin(), prefixes
.end());
1032 const size_t orig_size
= cached_browse_hashes_
.size();
1033 for (std::vector
<SBFullHashResult
>::const_iterator iter
= full_hits
.begin();
1034 iter
!= full_hits
.end(); ++iter
) {
1035 if (iter
->list_id
== safe_browsing_util::MALWARE
||
1036 iter
->list_id
== safe_browsing_util::PHISH
) {
1037 SBFullHashCached cached_hash
;
1038 cached_hash
.hash
= iter
->hash
;
1039 cached_hash
.list_id
= iter
->list_id
;
1040 cached_hash
.expire_after
= expire_after
;
1041 cached_browse_hashes_
.push_back(cached_hash
);
1045 // Sort new entries then merge with the previously-sorted entries.
1046 std::vector
<SBFullHashCached
>::iterator
1047 orig_end
= cached_browse_hashes_
.begin() + orig_size
;
1048 std::sort(orig_end
, cached_browse_hashes_
.end(), SBFullHashCachedPrefixLess
);
1049 std::inplace_merge(cached_browse_hashes_
.begin(),
1050 orig_end
, cached_browse_hashes_
.end(),
1051 SBFullHashCachedPrefixLess
);
1054 bool SafeBrowsingDatabaseNew::UpdateStarted(
1055 std::vector
<SBListChunkRanges
>* lists
) {
1056 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1059 // If |BeginUpdate()| fails, reset the database.
1060 if (!browse_store_
->BeginUpdate()) {
1061 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN
);
1062 HandleCorruptDatabase();
1066 if (download_store_
.get() && !download_store_
->BeginUpdate()) {
1067 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN
);
1068 HandleCorruptDatabase();
1072 if (csd_whitelist_store_
.get() && !csd_whitelist_store_
->BeginUpdate()) {
1073 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
);
1074 HandleCorruptDatabase();
1078 if (download_whitelist_store_
.get() &&
1079 !download_whitelist_store_
->BeginUpdate()) {
1080 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
);
1081 HandleCorruptDatabase();
1085 if (extension_blacklist_store_
&&
1086 !extension_blacklist_store_
->BeginUpdate()) {
1087 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN
);
1088 HandleCorruptDatabase();
1092 if (side_effect_free_whitelist_store_
&&
1093 !side_effect_free_whitelist_store_
->BeginUpdate()) {
1094 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN
);
1095 HandleCorruptDatabase();
1099 if (ip_blacklist_store_
&& !ip_blacklist_store_
->BeginUpdate()) {
1100 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN
);
1101 HandleCorruptDatabase();
1105 UpdateChunkRangesForLists(browse_store_
.get(),
1106 safe_browsing_util::kMalwareList
,
1107 safe_browsing_util::kPhishingList
,
1110 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1111 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1112 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1113 // extra data of that sort.
1114 UpdateChunkRangesForList(download_store_
.get(),
1115 safe_browsing_util::kBinUrlList
, lists
);
1117 UpdateChunkRangesForList(csd_whitelist_store_
.get(),
1118 safe_browsing_util::kCsdWhiteList
, lists
);
1120 UpdateChunkRangesForList(download_whitelist_store_
.get(),
1121 safe_browsing_util::kDownloadWhiteList
, lists
);
1123 UpdateChunkRangesForList(extension_blacklist_store_
.get(),
1124 safe_browsing_util::kExtensionBlacklist
, lists
);
1126 UpdateChunkRangesForList(side_effect_free_whitelist_store_
.get(),
1127 safe_browsing_util::kSideEffectFreeWhitelist
, lists
);
1129 UpdateChunkRangesForList(ip_blacklist_store_
.get(),
1130 safe_browsing_util::kIPBlacklist
, lists
);
1132 corruption_detected_
= false;
1133 change_detected_
= false;
1137 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded
) {
1138 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1140 // The update may have failed due to corrupt storage (for instance,
1141 // an excessive number of invalid add_chunks and sub_chunks).
1142 // Double-check that the databases are valid.
1143 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1144 // sections would allow throwing a corruption error in
1146 if (!update_succeeded
) {
1147 if (!browse_store_
->CheckValidity())
1148 DLOG(ERROR
) << "Safe-browsing browse database corrupt.";
1150 if (download_store_
.get() && !download_store_
->CheckValidity())
1151 DLOG(ERROR
) << "Safe-browsing download database corrupt.";
1153 if (csd_whitelist_store_
.get() && !csd_whitelist_store_
->CheckValidity())
1154 DLOG(ERROR
) << "Safe-browsing csd whitelist database corrupt.";
1156 if (download_whitelist_store_
.get() &&
1157 !download_whitelist_store_
->CheckValidity()) {
1158 DLOG(ERROR
) << "Safe-browsing download whitelist database corrupt.";
1161 if (extension_blacklist_store_
&&
1162 !extension_blacklist_store_
->CheckValidity()) {
1163 DLOG(ERROR
) << "Safe-browsing extension blacklist database corrupt.";
1166 if (side_effect_free_whitelist_store_
&&
1167 !side_effect_free_whitelist_store_
->CheckValidity()) {
1168 DLOG(ERROR
) << "Safe-browsing side-effect free whitelist database "
1172 if (ip_blacklist_store_
&& !ip_blacklist_store_
->CheckValidity()) {
1173 DLOG(ERROR
) << "Safe-browsing IP blacklist database corrupt.";
1177 if (corruption_detected_
)
1180 // Unroll the transaction if there was a protocol error or if the
1181 // transaction was empty. This will leave the prefix set, the
1182 // pending hashes, and the prefix miss cache in place.
1183 if (!update_succeeded
|| !change_detected_
) {
1184 // Track empty updates to answer questions at http://crbug.com/72216 .
1185 if (update_succeeded
&& !change_detected_
)
1186 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1187 browse_store_
->CancelUpdate();
1188 if (download_store_
.get())
1189 download_store_
->CancelUpdate();
1190 if (csd_whitelist_store_
.get())
1191 csd_whitelist_store_
->CancelUpdate();
1192 if (download_whitelist_store_
.get())
1193 download_whitelist_store_
->CancelUpdate();
1194 if (extension_blacklist_store_
)
1195 extension_blacklist_store_
->CancelUpdate();
1196 if (side_effect_free_whitelist_store_
)
1197 side_effect_free_whitelist_store_
->CancelUpdate();
1198 if (ip_blacklist_store_
)
1199 ip_blacklist_store_
->CancelUpdate();
1203 if (download_store_
) {
1204 int64 size_bytes
= UpdateHashPrefixStore(
1206 download_store_
.get(),
1207 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH
);
1208 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1209 static_cast<int>(size_bytes
/ 1024));
1212 UpdateBrowseStore();
1213 UpdateWhitelistStore(csd_whitelist_filename_
,
1214 csd_whitelist_store_
.get(),
1216 UpdateWhitelistStore(download_whitelist_filename_
,
1217 download_whitelist_store_
.get(),
1218 &download_whitelist_
);
1220 if (extension_blacklist_store_
) {
1221 int64 size_bytes
= UpdateHashPrefixStore(
1222 extension_blacklist_filename_
,
1223 extension_blacklist_store_
.get(),
1224 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH
);
1225 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1226 static_cast<int>(size_bytes
/ 1024));
1229 if (side_effect_free_whitelist_store_
)
1230 UpdateSideEffectFreeWhitelistStore();
1232 if (ip_blacklist_store_
)
1233 UpdateIpBlacklistStore();
1236 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1237 const base::FilePath
& store_filename
,
1238 SafeBrowsingStore
* store
,
1239 SBWhitelist
* whitelist
) {
1243 // Note: |builder| will not be empty. The current data store implementation
1244 // stores all full-length hashes as both full and prefix hashes.
1245 safe_browsing::PrefixSetBuilder builder
;
1246 std::vector
<SBAddFullHash
> full_hashes
;
1247 if (!store
->FinishUpdate(&builder
, &full_hashes
)) {
1248 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH
);
1249 WhitelistEverything(whitelist
);
1253 #if defined(OS_MACOSX)
1254 base::mac::SetFileBackupExclusion(store_filename
);
1257 LoadWhitelist(full_hashes
, whitelist
);
1260 int64
SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1261 const base::FilePath
& store_filename
,
1262 SafeBrowsingStore
* store
,
1263 FailureType failure_type
) {
1264 // These results are not used after this call. Simply ignore the
1265 // returned value after FinishUpdate(...).
1266 safe_browsing::PrefixSetBuilder builder
;
1267 std::vector
<SBAddFullHash
> add_full_hashes_result
;
1269 if (!store
->FinishUpdate(&builder
, &add_full_hashes_result
))
1270 RecordFailure(failure_type
);
1272 #if defined(OS_MACOSX)
1273 base::mac::SetFileBackupExclusion(store_filename
);
1276 return GetFileSizeOrZero(store_filename
);
1279 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1280 // Measure the amount of IO during the filter build.
1281 base::IoCounters io_before
, io_after
;
1282 base::ProcessHandle handle
= base::Process::Current().handle();
1283 scoped_ptr
<base::ProcessMetrics
> metric(
1284 #if !defined(OS_MACOSX)
1285 base::ProcessMetrics::CreateProcessMetrics(handle
)
1287 // Getting stats only for the current process is enough, so NULL is fine.
1288 base::ProcessMetrics::CreateProcessMetrics(handle
, NULL
)
1292 // IoCounters are currently not supported on Mac, and may not be
1293 // available for Linux, so we check the result and only show IO
1294 // stats if they are available.
1295 const bool got_counters
= metric
->GetIOCounters(&io_before
);
1297 const base::TimeTicks before
= base::TimeTicks::Now();
1299 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1300 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use
1301 // the SBFullHash portion. It would need an accessor on PrefixSet.
1302 safe_browsing::PrefixSetBuilder builder
;
1303 std::vector
<SBAddFullHash
> add_full_hashes
;
1304 if (!browse_store_
->FinishUpdate(&builder
, &add_full_hashes
)) {
1305 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH
);
1309 std::vector
<SBFullHash
> full_hash_results
;
1310 for (size_t i
= 0; i
< add_full_hashes
.size(); ++i
) {
1311 full_hash_results
.push_back(add_full_hashes
[i
].full_hash
);
1314 scoped_ptr
<safe_browsing::PrefixSet
>
1315 prefix_set(builder
.GetPrefixSet(full_hash_results
));
1317 // Swap in the newly built filter and cache.
1319 base::AutoLock
locked(lookup_lock_
);
1321 // TODO(shess): If |CacheHashResults()| is posted between the
1322 // earlier lock and this clear, those pending hashes will be lost.
1323 // It could be fixed by only removing hashes which were collected
1324 // at the earlier point. I believe that is fail-safe as-is (the
1325 // hash will be fetched again).
1326 cached_browse_hashes_
.clear();
1327 prefix_miss_cache_
.clear();
1328 browse_prefix_set_
.swap(prefix_set
);
1331 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1332 << (base::TimeTicks::Now() - before
).InMilliseconds()
1334 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before
);
1336 // Persist the prefix set to disk. Since only this thread changes
1337 // |browse_prefix_set_|, there is no need to lock.
1340 // Gather statistics.
1341 if (got_counters
&& metric
->GetIOCounters(&io_after
)) {
1342 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1343 static_cast<int>(io_after
.ReadTransferCount
-
1344 io_before
.ReadTransferCount
) / 1024);
1345 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1346 static_cast<int>(io_after
.WriteTransferCount
-
1347 io_before
.WriteTransferCount
) / 1024);
1348 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1349 static_cast<int>(io_after
.ReadOperationCount
-
1350 io_before
.ReadOperationCount
));
1351 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1352 static_cast<int>(io_after
.WriteOperationCount
-
1353 io_before
.WriteOperationCount
));
1356 int64 file_size
= GetFileSizeOrZero(browse_prefix_set_filename_
);
1357 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1358 static_cast<int>(file_size
/ 1024));
1359 file_size
= GetFileSizeOrZero(browse_filename_
);
1360 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1361 static_cast<int>(file_size
/ 1024));
1363 #if defined(OS_MACOSX)
1364 base::mac::SetFileBackupExclusion(browse_filename_
);
1368 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1369 safe_browsing::PrefixSetBuilder builder
;
1370 std::vector
<SBAddFullHash
> add_full_hashes_result
;
1372 if (!side_effect_free_whitelist_store_
->FinishUpdate(
1373 &builder
, &add_full_hashes_result
)) {
1374 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH
);
1377 scoped_ptr
<safe_browsing::PrefixSet
>
1378 prefix_set(builder
.GetPrefixSetNoHashes());
1380 // Swap in the newly built prefix set.
1382 base::AutoLock
locked(lookup_lock_
);
1383 side_effect_free_whitelist_prefix_set_
.swap(prefix_set
);
1386 const base::TimeTicks before
= base::TimeTicks::Now();
1387 const bool write_ok
= side_effect_free_whitelist_prefix_set_
->WriteFile(
1388 side_effect_free_whitelist_prefix_set_filename_
);
1389 DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1390 << "set in " << (base::TimeTicks::Now() - before
).InMilliseconds()
1392 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1393 base::TimeTicks::Now() - before
);
1396 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE
);
1398 // Gather statistics.
1399 int64 file_size
= GetFileSizeOrZero(
1400 side_effect_free_whitelist_prefix_set_filename_
);
1401 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1402 static_cast<int>(file_size
/ 1024));
1403 file_size
= GetFileSizeOrZero(side_effect_free_whitelist_filename_
);
1404 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1405 static_cast<int>(file_size
/ 1024));
1407 #if defined(OS_MACOSX)
1408 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_
);
1409 base::mac::SetFileBackupExclusion(
1410 side_effect_free_whitelist_prefix_set_filename_
);
1414 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1415 // Note: prefixes will not be empty. The current data store implementation
1416 // stores all full-length hashes as both full and prefix hashes.
1417 safe_browsing::PrefixSetBuilder builder
;
1418 std::vector
<SBAddFullHash
> full_hashes
;
1419 if (!ip_blacklist_store_
->FinishUpdate(&builder
, &full_hashes
)) {
1420 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH
);
1421 LoadIpBlacklist(std::vector
<SBAddFullHash
>()); // Clear the list.
1425 #if defined(OS_MACOSX)
1426 base::mac::SetFileBackupExclusion(ip_blacklist_filename_
);
1429 LoadIpBlacklist(full_hashes
);
1432 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1433 // Reset the database after the current task has unwound (but only
1434 // reset once within the scope of a given task).
1435 if (!reset_factory_
.HasWeakPtrs()) {
1436 RecordFailure(FAILURE_DATABASE_CORRUPT
);
1437 base::MessageLoop::current()->PostTask(FROM_HERE
,
1438 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase
,
1439 reset_factory_
.GetWeakPtr()));
1443 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1444 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER
);
1445 corruption_detected_
= true; // Stop updating the database.
1447 DLOG(FATAL
) << "SafeBrowsing database was corrupt and reset";
1450 // TODO(shess): I'm not clear why this code doesn't have any
1451 // real error-handling.
1452 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1453 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1454 DCHECK(!browse_prefix_set_filename_
.empty());
1456 // If there is no database, the filter cannot be used.
1457 base::File::Info db_info
;
1458 if (!base::GetFileInfo(browse_filename_
, &db_info
) || db_info
.size
== 0)
1461 // Cleanup any stale bloom filter (no longer used).
1462 // TODO(shess): Track failure to delete?
1463 base::FilePath bloom_filter_filename
=
1464 BloomFilterForFilename(browse_filename_
);
1465 base::DeleteFile(bloom_filter_filename
, false);
1467 const base::TimeTicks before
= base::TimeTicks::Now();
1468 browse_prefix_set_
= safe_browsing::PrefixSet::LoadFile(
1469 browse_prefix_set_filename_
);
1470 DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1471 << (base::TimeTicks::Now() - before
).InMilliseconds() << " ms";
1472 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before
);
1474 if (!browse_prefix_set_
.get())
1475 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ
);
1478 bool SafeBrowsingDatabaseNew::Delete() {
1479 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1481 const bool r1
= browse_store_
->Delete();
1483 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1485 const bool r2
= download_store_
.get() ? download_store_
->Delete() : true;
1487 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1489 const bool r3
= csd_whitelist_store_
.get() ?
1490 csd_whitelist_store_
->Delete() : true;
1492 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1494 const bool r4
= download_whitelist_store_
.get() ?
1495 download_whitelist_store_
->Delete() : true;
1497 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1499 base::FilePath bloom_filter_filename
=
1500 BloomFilterForFilename(browse_filename_
);
1501 const bool r5
= base::DeleteFile(bloom_filter_filename
, false);
1503 RecordFailure(FAILURE_DATABASE_FILTER_DELETE
);
1505 const bool r6
= base::DeleteFile(browse_prefix_set_filename_
, false);
1507 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE
);
1509 const bool r7
= base::DeleteFile(extension_blacklist_filename_
, false);
1511 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE
);
1513 const bool r8
= base::DeleteFile(side_effect_free_whitelist_filename_
,
1516 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE
);
1518 const bool r9
= base::DeleteFile(
1519 side_effect_free_whitelist_prefix_set_filename_
,
1522 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE
);
1524 const bool r10
= base::DeleteFile(ip_blacklist_filename_
, false);
1526 RecordFailure(FAILURE_IP_BLACKLIST_DELETE
);
1528 return r1
&& r2
&& r3
&& r4
&& r5
&& r6
&& r7
&& r8
&& r9
&& r10
;
1531 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1532 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1534 if (!browse_prefix_set_
.get())
1537 const base::TimeTicks before
= base::TimeTicks::Now();
1538 const bool write_ok
= browse_prefix_set_
->WriteFile(
1539 browse_prefix_set_filename_
);
1540 DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1541 << (base::TimeTicks::Now() - before
).InMilliseconds() << " ms";
1542 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before
);
1545 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE
);
1547 #if defined(OS_MACOSX)
1548 base::mac::SetFileBackupExclusion(browse_prefix_set_filename_
);
1552 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist
* whitelist
) {
1553 base::AutoLock
locked(lookup_lock_
);
1554 whitelist
->second
= true;
1555 whitelist
->first
.clear();
1558 void SafeBrowsingDatabaseNew::LoadWhitelist(
1559 const std::vector
<SBAddFullHash
>& full_hashes
,
1560 SBWhitelist
* whitelist
) {
1561 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1562 if (full_hashes
.size() > kMaxWhitelistSize
) {
1563 WhitelistEverything(whitelist
);
1567 std::vector
<SBFullHash
> new_whitelist
;
1568 new_whitelist
.reserve(full_hashes
.size());
1569 for (std::vector
<SBAddFullHash
>::const_iterator it
= full_hashes
.begin();
1570 it
!= full_hashes
.end(); ++it
) {
1571 new_whitelist
.push_back(it
->full_hash
);
1573 std::sort(new_whitelist
.begin(), new_whitelist
.end(), SBFullHashLess
);
1575 SBFullHash kill_switch
= SBFullHashForString(kWhitelistKillSwitchUrl
);
1576 if (std::binary_search(new_whitelist
.begin(), new_whitelist
.end(),
1577 kill_switch
, SBFullHashLess
)) {
1578 // The kill switch is whitelisted hence we whitelist all URLs.
1579 WhitelistEverything(whitelist
);
1581 base::AutoLock
locked(lookup_lock_
);
1582 whitelist
->second
= false;
1583 whitelist
->first
.swap(new_whitelist
);
1587 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1588 const std::vector
<SBAddFullHash
>& full_hashes
) {
1589 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1590 IPBlacklist new_blacklist
;
1591 DVLOG(2) << "Writing IP blacklist of size: " << full_hashes
.size();
1592 for (std::vector
<SBAddFullHash
>::const_iterator it
= full_hashes
.begin();
1593 it
!= full_hashes
.end();
1595 const char* full_hash
= it
->full_hash
.full_hash
;
1596 DCHECK_EQ(crypto::kSHA256Length
, arraysize(it
->full_hash
.full_hash
));
1597 // The format of the IP blacklist is:
1598 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1599 std::string
hashed_ip_prefix(full_hash
, base::kSHA1Length
);
1600 size_t prefix_size
= static_cast<uint8
>(full_hash
[base::kSHA1Length
]);
1601 if (prefix_size
> kMaxIpPrefixSize
|| prefix_size
< kMinIpPrefixSize
) {
1602 DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size
;
1603 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID
);
1604 new_blacklist
.clear(); // Load empty blacklist.
1608 // We precompute the mask for the given subnet size to speed up lookups.
1609 // Basically we need to create a 16B long string which has the highest
1610 // |size| bits sets to one.
1611 std::string
mask(net::kIPv6AddressSize
, '\0');
1612 mask
.replace(0, prefix_size
/ 8, prefix_size
/ 8, '\xFF');
1613 if ((prefix_size
% 8) != 0) {
1614 mask
[prefix_size
/ 8] = 0xFF << (8 - (prefix_size
% 8));
1616 DVLOG(2) << "Inserting malicious IP: "
1617 << " raw:" << base::HexEncode(full_hash
, crypto::kSHA256Length
)
1618 << " mask:" << base::HexEncode(mask
.data(), mask
.size())
1619 << " prefix_size:" << prefix_size
1620 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix
.data(),
1621 hashed_ip_prefix
.size());
1622 new_blacklist
[mask
].insert(hashed_ip_prefix
);
1625 base::AutoLock
locked(lookup_lock_
);
1626 ip_blacklist_
.swap(new_blacklist
);
1629 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1630 SBFullHash malware_kill_switch
= SBFullHashForString(kMalwareIPKillSwitchUrl
);
1631 std::vector
<SBFullHash
> full_hashes
;
1632 full_hashes
.push_back(malware_kill_switch
);
1633 return ContainsWhitelistedHashes(csd_whitelist_
, full_hashes
);
1636 bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() {
1637 return csd_whitelist_
.second
;