1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
10 #include "base/bind.h"
11 #include "base/files/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process_handle.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
32 using content::BrowserThread
;
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile
[] =
38 FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile
[] =
41 FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile
[] =
44 FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile
[] =
47 FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile
[] =
50 FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile
[] =
53 FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile
[] =
56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile
[] =
59 FILE_PATH_LITERAL(" IP Blacklist");
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
67 // TODO(shess): This shouldn't be OS-driven <http://crbug.com/394379>
68 #if defined(OS_ANDROID)
69 // NOTE(shess): This difference is also reflected in the list name in
70 // safe_browsing_util.cc.
71 // TODO(shess): Spin up an alternate list id which can be persisted in the
72 // store. Then if a mistake is made, it won't cause confusion between
73 // incompatible lists.
74 const base::FilePath::CharType kBrowseDBFile
[] = FILE_PATH_LITERAL(" Mobile");
76 const base::FilePath::CharType kBrowseDBFile
[] = FILE_PATH_LITERAL(" Bloom");
79 // Maximum number of entries we allow in any of the whitelists.
80 // If a whitelist on disk contains more entries then all lookups to
81 // the whitelist will be considered a match.
82 const size_t kMaxWhitelistSize
= 5000;
84 // If the hash of this exact expression is on a whitelist then all
85 // lookups to this whitelist will be considered a match.
86 const char kWhitelistKillSwitchUrl
[] =
87 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
89 // If the hash of this exact expression is on a whitelist then the
90 // malware IP blacklisting feature will be disabled in csd.
92 const char kMalwareIPKillSwitchUrl
[] =
93 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
95 const size_t kMaxIpPrefixSize
= 128;
96 const size_t kMinIpPrefixSize
= 1;
98 // To save space, the incoming |chunk_id| and |list_id| are combined
99 // into an |encoded_chunk_id| for storage by shifting the |list_id|
100 // into the low-order bits. These functions decode that information.
101 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
102 // there should be better ways to save chunk_id and list_id after we use
103 // SafeBrowsingStoreFile.
104 int GetListIdBit(const int encoded_chunk_id
) {
105 return encoded_chunk_id
& 1;
107 int DecodeChunkId(int encoded_chunk_id
) {
108 return encoded_chunk_id
>> 1;
110 int EncodeChunkId(const int chunk
, const int list_id
) {
111 DCHECK_NE(list_id
, safe_browsing_util::INVALID
);
112 return chunk
<< 1 | list_id
% 2;
115 // Generate the set of full hashes to check for |url|. If
116 // |include_whitelist_hashes| is true we will generate additional path-prefixes
117 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the
118 // whitelist it should also match /foo/bar which is not the case for all the
119 // other lists. We'll also always add a pattern for the empty path.
120 // TODO(shess): This function is almost the same as
121 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
122 // does an early exit on match. Since match should be the infrequent
123 // case (phishing or malware found), consider combining this function
125 void BrowseFullHashesToCheck(const GURL
& url
,
126 bool include_whitelist_hashes
,
127 std::vector
<SBFullHash
>* full_hashes
) {
128 std::vector
<std::string
> hosts
;
129 if (url
.HostIsIPAddress()) {
130 hosts
.push_back(url
.host());
132 safe_browsing_util::GenerateHostsToCheck(url
, &hosts
);
135 std::vector
<std::string
> paths
;
136 safe_browsing_util::GeneratePathsToCheck(url
, &paths
);
138 for (size_t i
= 0; i
< hosts
.size(); ++i
) {
139 for (size_t j
= 0; j
< paths
.size(); ++j
) {
140 const std::string
& path
= paths
[j
];
141 full_hashes
->push_back(SBFullHashForString(hosts
[i
] + path
));
143 // We may have /foo as path-prefix in the whitelist which should
144 // also match with /foo/bar and /foo?bar. Hence, for every path
145 // that ends in '/' we also add the path without the slash.
146 if (include_whitelist_hashes
&&
148 path
[path
.size() - 1] == '/') {
149 full_hashes
->push_back(
150 SBFullHashForString(hosts
[i
] + path
.substr(0, path
.size() - 1)));
156 // Get the prefixes matching the download |urls|.
157 void GetDownloadUrlPrefixes(const std::vector
<GURL
>& urls
,
158 std::vector
<SBPrefix
>* prefixes
) {
159 std::vector
<SBFullHash
> full_hashes
;
160 for (size_t i
= 0; i
< urls
.size(); ++i
)
161 BrowseFullHashesToCheck(urls
[i
], false, &full_hashes
);
163 for (size_t i
= 0; i
< full_hashes
.size(); ++i
)
164 prefixes
->push_back(full_hashes
[i
].prefix
);
167 // Helper function to compare addprefixes in |store| with |prefixes|.
168 // The |list_bit| indicates which list (url or hash) to compare.
170 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
171 // the actual matching prefixes.
172 bool MatchAddPrefixes(SafeBrowsingStore
* store
,
174 const std::vector
<SBPrefix
>& prefixes
,
175 std::vector
<SBPrefix
>* prefix_hits
) {
176 prefix_hits
->clear();
177 bool found_match
= false;
179 SBAddPrefixes add_prefixes
;
180 store
->GetAddPrefixes(&add_prefixes
);
181 for (SBAddPrefixes::const_iterator iter
= add_prefixes
.begin();
182 iter
!= add_prefixes
.end(); ++iter
) {
183 for (size_t j
= 0; j
< prefixes
.size(); ++j
) {
184 const SBPrefix
& prefix
= prefixes
[j
];
185 if (prefix
== iter
->prefix
&&
186 GetListIdBit(iter
->chunk_id
) == list_bit
) {
187 prefix_hits
->push_back(prefix
);
195 // This function generates a chunk range string for |chunks|. It
196 // outputs one chunk range string per list and writes it to the
197 // |list_ranges| vector. We expect |list_ranges| to already be of the
198 // right size. E.g., if |chunks| contains chunks with two different
199 // list ids then |list_ranges| must contain two elements.
200 void GetChunkRanges(const std::vector
<int>& chunks
,
201 std::vector
<std::string
>* list_ranges
) {
202 // Since there are 2 possible list ids, there must be exactly two
203 // list ranges. Even if the chunk data should only contain one
204 // line, this code has to somehow handle corruption.
205 DCHECK_EQ(2U, list_ranges
->size());
207 std::vector
<std::vector
<int> > decoded_chunks(list_ranges
->size());
208 for (std::vector
<int>::const_iterator iter
= chunks
.begin();
209 iter
!= chunks
.end(); ++iter
) {
210 int mod_list_id
= GetListIdBit(*iter
);
211 DCHECK_GE(mod_list_id
, 0);
212 DCHECK_LT(static_cast<size_t>(mod_list_id
), decoded_chunks
.size());
213 decoded_chunks
[mod_list_id
].push_back(DecodeChunkId(*iter
));
215 for (size_t i
= 0; i
< decoded_chunks
.size(); ++i
) {
216 ChunksToRangeString(decoded_chunks
[i
], &((*list_ranges
)[i
]));
220 // Helper function to create chunk range lists for Browse related
222 void UpdateChunkRanges(SafeBrowsingStore
* store
,
223 const std::vector
<std::string
>& listnames
,
224 std::vector
<SBListChunkRanges
>* lists
) {
228 DCHECK_GT(listnames
.size(), 0U);
229 DCHECK_LE(listnames
.size(), 2U);
230 std::vector
<int> add_chunks
;
231 std::vector
<int> sub_chunks
;
232 store
->GetAddChunks(&add_chunks
);
233 store
->GetSubChunks(&sub_chunks
);
235 // Always decode 2 ranges, even if only the first one is expected.
236 // The loop below will only load as many into |lists| as |listnames|
238 std::vector
<std::string
> adds(2);
239 std::vector
<std::string
> subs(2);
240 GetChunkRanges(add_chunks
, &adds
);
241 GetChunkRanges(sub_chunks
, &subs
);
243 for (size_t i
= 0; i
< listnames
.size(); ++i
) {
244 const std::string
& listname
= listnames
[i
];
245 DCHECK_EQ(safe_browsing_util::GetListId(listname
) % 2,
246 static_cast<int>(i
% 2));
247 DCHECK_NE(safe_browsing_util::GetListId(listname
),
248 safe_browsing_util::INVALID
);
249 lists
->push_back(SBListChunkRanges(listname
));
250 lists
->back().adds
.swap(adds
[i
]);
251 lists
->back().subs
.swap(subs
[i
]);
255 void UpdateChunkRangesForLists(SafeBrowsingStore
* store
,
256 const std::string
& listname0
,
257 const std::string
& listname1
,
258 std::vector
<SBListChunkRanges
>* lists
) {
259 std::vector
<std::string
> listnames
;
260 listnames
.push_back(listname0
);
261 listnames
.push_back(listname1
);
262 UpdateChunkRanges(store
, listnames
, lists
);
265 void UpdateChunkRangesForList(SafeBrowsingStore
* store
,
266 const std::string
& listname
,
267 std::vector
<SBListChunkRanges
>* lists
) {
268 UpdateChunkRanges(store
, std::vector
<std::string
>(1, listname
), lists
);
271 // This code always checks for non-zero file size. This helper makes
272 // that less verbose.
273 int64
GetFileSizeOrZero(const base::FilePath
& file_path
) {
275 if (!base::GetFileSize(file_path
, &size_64
))
280 // Helper for ContainsBrowseUrlHashes(). Returns true if an un-expired match
281 // for |full_hash| is found in |cache|, with any matches appended to |results|
282 // (true can be returned with zero matches). |expire_base| is used to check the
283 // cache lifetime of matches, expired matches will be discarded from |cache|.
284 bool GetCachedFullHash(std::map
<SBPrefix
, SBCachedFullHashResult
>* cache
,
285 const SBFullHash
& full_hash
,
286 const base::Time
& expire_base
,
287 std::vector
<SBFullHashResult
>* results
) {
288 // First check if there is a valid cached result for this prefix.
289 std::map
<SBPrefix
, SBCachedFullHashResult
>::iterator
290 citer
= cache
->find(full_hash
.prefix
);
291 if (citer
== cache
->end())
294 // Remove expired entries.
295 SBCachedFullHashResult
& cached_result
= citer
->second
;
296 if (cached_result
.expire_after
<= expire_base
) {
301 // Find full-hash matches.
302 std::vector
<SBFullHashResult
>& cached_hashes
= cached_result
.full_hashes
;
303 for (size_t i
= 0; i
< cached_hashes
.size(); ++i
) {
304 if (SBFullHashEqual(full_hash
, cached_hashes
[i
].hash
))
305 results
->push_back(cached_hashes
[i
]);
313 // The default SafeBrowsingDatabaseFactory.
314 class SafeBrowsingDatabaseFactoryImpl
: public SafeBrowsingDatabaseFactory
{
316 virtual SafeBrowsingDatabase
* CreateSafeBrowsingDatabase(
317 bool enable_download_protection
,
318 bool enable_client_side_whitelist
,
319 bool enable_download_whitelist
,
320 bool enable_extension_blacklist
,
321 bool enable_side_effect_free_whitelist
,
322 bool enable_ip_blacklist
) override
{
323 return new SafeBrowsingDatabaseNew(
324 new SafeBrowsingStoreFile
,
325 enable_download_protection
? new SafeBrowsingStoreFile
: NULL
,
326 enable_client_side_whitelist
? new SafeBrowsingStoreFile
: NULL
,
327 enable_download_whitelist
? new SafeBrowsingStoreFile
: NULL
,
328 enable_extension_blacklist
? new SafeBrowsingStoreFile
: NULL
,
329 enable_side_effect_free_whitelist
? new SafeBrowsingStoreFile
: NULL
,
330 enable_ip_blacklist
? new SafeBrowsingStoreFile
: NULL
);
333 SafeBrowsingDatabaseFactoryImpl() { }
336 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl
);
340 SafeBrowsingDatabaseFactory
* SafeBrowsingDatabase::factory_
= NULL
;
342 // Factory method, non-thread safe. Caller has to make sure this s called
343 // on SafeBrowsing Thread.
344 // TODO(shess): There's no need for a factory any longer. Convert
345 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
346 // callers just construct things directly.
347 SafeBrowsingDatabase
* SafeBrowsingDatabase::Create(
348 bool enable_download_protection
,
349 bool enable_client_side_whitelist
,
350 bool enable_download_whitelist
,
351 bool enable_extension_blacklist
,
352 bool enable_side_effect_free_whitelist
,
353 bool enable_ip_blacklist
) {
355 factory_
= new SafeBrowsingDatabaseFactoryImpl();
356 return factory_
->CreateSafeBrowsingDatabase(
357 enable_download_protection
,
358 enable_client_side_whitelist
,
359 enable_download_whitelist
,
360 enable_extension_blacklist
,
361 enable_side_effect_free_whitelist
,
362 enable_ip_blacklist
);
365 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
369 base::FilePath
SafeBrowsingDatabase::BrowseDBFilename(
370 const base::FilePath
& db_base_filename
) {
371 return base::FilePath(db_base_filename
.value() + kBrowseDBFile
);
375 base::FilePath
SafeBrowsingDatabase::DownloadDBFilename(
376 const base::FilePath
& db_base_filename
) {
377 return base::FilePath(db_base_filename
.value() + kDownloadDBFile
);
381 base::FilePath
SafeBrowsingDatabase::BloomFilterForFilename(
382 const base::FilePath
& db_filename
) {
383 return base::FilePath(db_filename
.value() + kBloomFilterFile
);
387 base::FilePath
SafeBrowsingDatabase::PrefixSetForFilename(
388 const base::FilePath
& db_filename
) {
389 return base::FilePath(db_filename
.value() + kPrefixSetFile
);
393 base::FilePath
SafeBrowsingDatabase::CsdWhitelistDBFilename(
394 const base::FilePath
& db_filename
) {
395 return base::FilePath(db_filename
.value() + kCsdWhitelistDBFile
);
399 base::FilePath
SafeBrowsingDatabase::DownloadWhitelistDBFilename(
400 const base::FilePath
& db_filename
) {
401 return base::FilePath(db_filename
.value() + kDownloadWhitelistDBFile
);
405 base::FilePath
SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
406 const base::FilePath
& db_filename
) {
407 return base::FilePath(db_filename
.value() + kExtensionBlacklistDBFile
);
411 base::FilePath
SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
412 const base::FilePath
& db_filename
) {
413 return base::FilePath(db_filename
.value() + kSideEffectFreeWhitelistDBFile
);
417 base::FilePath
SafeBrowsingDatabase::IpBlacklistDBFilename(
418 const base::FilePath
& db_filename
) {
419 return base::FilePath(db_filename
.value() + kIPBlacklistDBFile
);
422 SafeBrowsingStore
* SafeBrowsingDatabaseNew::GetStore(const int list_id
) {
423 if (list_id
== safe_browsing_util::PHISH
||
424 list_id
== safe_browsing_util::MALWARE
) {
425 return browse_store_
.get();
426 } else if (list_id
== safe_browsing_util::BINURL
) {
427 return download_store_
.get();
428 } else if (list_id
== safe_browsing_util::CSDWHITELIST
) {
429 return csd_whitelist_store_
.get();
430 } else if (list_id
== safe_browsing_util::DOWNLOADWHITELIST
) {
431 return download_whitelist_store_
.get();
432 } else if (list_id
== safe_browsing_util::EXTENSIONBLACKLIST
) {
433 return extension_blacklist_store_
.get();
434 } else if (list_id
== safe_browsing_util::SIDEEFFECTFREEWHITELIST
) {
435 return side_effect_free_whitelist_store_
.get();
436 } else if (list_id
== safe_browsing_util::IPBLACKLIST
) {
437 return ip_blacklist_store_
.get();
443 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type
) {
444 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type
,
445 FAILURE_DATABASE_MAX
);
448 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
449 : creation_loop_(base::MessageLoop::current()),
450 browse_store_(new SafeBrowsingStoreFile
),
451 corruption_detected_(false),
452 change_detected_(false),
453 reset_factory_(this) {
454 DCHECK(browse_store_
.get());
455 DCHECK(!download_store_
.get());
456 DCHECK(!csd_whitelist_store_
.get());
457 DCHECK(!download_whitelist_store_
.get());
458 DCHECK(!extension_blacklist_store_
.get());
459 DCHECK(!side_effect_free_whitelist_store_
.get());
460 DCHECK(!ip_blacklist_store_
.get());
463 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
464 SafeBrowsingStore
* browse_store
,
465 SafeBrowsingStore
* download_store
,
466 SafeBrowsingStore
* csd_whitelist_store
,
467 SafeBrowsingStore
* download_whitelist_store
,
468 SafeBrowsingStore
* extension_blacklist_store
,
469 SafeBrowsingStore
* side_effect_free_whitelist_store
,
470 SafeBrowsingStore
* ip_blacklist_store
)
471 : creation_loop_(base::MessageLoop::current()),
472 browse_store_(browse_store
),
473 download_store_(download_store
),
474 csd_whitelist_store_(csd_whitelist_store
),
475 download_whitelist_store_(download_whitelist_store
),
476 extension_blacklist_store_(extension_blacklist_store
),
477 side_effect_free_whitelist_store_(side_effect_free_whitelist_store
),
478 ip_blacklist_store_(ip_blacklist_store
),
479 corruption_detected_(false),
480 reset_factory_(this) {
481 DCHECK(browse_store_
.get());
484 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
485 // The DCHECK is disabled due to crbug.com/338486 .
486 // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
489 void SafeBrowsingDatabaseNew::Init(const base::FilePath
& filename_base
) {
490 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
492 // This should not be run multiple times.
493 DCHECK(filename_base_
.empty());
495 filename_base_
= filename_base
;
497 // TODO(shess): The various stores are really only necessary while doing
498 // updates, or when querying a store directly (see |ContainsDownloadUrl()|).
499 // The store variables are also tested to see if a list is enabled. Perhaps
500 // the stores could be refactored into an update object so that they are only
501 // live in memory while being actively used. The sense of enabled probably
502 // belongs in protocol_manager or database_manager.
505 BrowseDBFilename(filename_base_
),
506 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
507 base::Unretained(this)));
510 // NOTE: There is no need to grab the lock in this function, since
511 // until it returns, there are no pointers to this class on other
512 // threads. Then again, that means there is no possibility of
513 // contention on the lock...
514 base::AutoLock
locked(lookup_lock_
);
515 browse_gethash_cache_
.clear();
519 if (download_store_
.get()) {
520 download_store_
->Init(
521 DownloadDBFilename(filename_base_
),
522 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
523 base::Unretained(this)));
526 if (csd_whitelist_store_
.get()) {
527 csd_whitelist_store_
->Init(
528 CsdWhitelistDBFilename(filename_base_
),
529 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
530 base::Unretained(this)));
532 std::vector
<SBAddFullHash
> full_hashes
;
533 if (csd_whitelist_store_
->GetAddFullHashes(&full_hashes
)) {
534 LoadWhitelist(full_hashes
, &csd_whitelist_
);
536 WhitelistEverything(&csd_whitelist_
);
539 WhitelistEverything(&csd_whitelist_
); // Just to be safe.
542 if (download_whitelist_store_
.get()) {
543 download_whitelist_store_
->Init(
544 DownloadWhitelistDBFilename(filename_base_
),
545 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
546 base::Unretained(this)));
548 std::vector
<SBAddFullHash
> full_hashes
;
549 if (download_whitelist_store_
->GetAddFullHashes(&full_hashes
)) {
550 LoadWhitelist(full_hashes
, &download_whitelist_
);
552 WhitelistEverything(&download_whitelist_
);
555 WhitelistEverything(&download_whitelist_
); // Just to be safe.
558 if (extension_blacklist_store_
.get()) {
559 extension_blacklist_store_
->Init(
560 ExtensionBlacklistDBFilename(filename_base_
),
561 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
562 base::Unretained(this)));
565 if (side_effect_free_whitelist_store_
.get()) {
566 const base::FilePath side_effect_free_whitelist_filename
=
567 SideEffectFreeWhitelistDBFilename(filename_base_
);
568 const base::FilePath side_effect_free_whitelist_prefix_set_filename
=
569 PrefixSetForFilename(side_effect_free_whitelist_filename
);
570 side_effect_free_whitelist_store_
->Init(
571 side_effect_free_whitelist_filename
,
572 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
573 base::Unretained(this)));
575 // Only use the prefix set if database is present and non-empty.
576 if (GetFileSizeOrZero(side_effect_free_whitelist_filename
)) {
577 const base::TimeTicks before
= base::TimeTicks::Now();
578 side_effect_free_whitelist_prefix_set_
=
579 safe_browsing::PrefixSet::LoadFile(
580 side_effect_free_whitelist_prefix_set_filename
);
581 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
582 base::TimeTicks::Now() - before
);
583 if (!side_effect_free_whitelist_prefix_set_
.get())
584 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ
);
587 // Delete any files of the side-effect free sidelist that may be around
588 // from when it was previously enabled.
589 SafeBrowsingStoreFile::DeleteStore(
590 SideEffectFreeWhitelistDBFilename(filename_base_
));
592 PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_
)),
596 if (ip_blacklist_store_
.get()) {
597 ip_blacklist_store_
->Init(
598 IpBlacklistDBFilename(filename_base_
),
599 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase
,
600 base::Unretained(this)));
602 std::vector
<SBAddFullHash
> full_hashes
;
603 if (ip_blacklist_store_
->GetAddFullHashes(&full_hashes
)) {
604 LoadIpBlacklist(full_hashes
);
606 LoadIpBlacklist(std::vector
<SBAddFullHash
>()); // Clear the list.
611 bool SafeBrowsingDatabaseNew::ResetDatabase() {
612 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
614 // Delete files on disk.
615 // TODO(shess): Hard to see where one might want to delete without a
616 // reset. Perhaps inline |Delete()|?
620 // Reset objects in memory.
622 base::AutoLock
locked(lookup_lock_
);
623 browse_gethash_cache_
.clear();
624 browse_prefix_set_
.reset();
625 side_effect_free_whitelist_prefix_set_
.reset();
626 ip_blacklist_
.clear();
628 // Wants to acquire the lock itself.
629 WhitelistEverything(&csd_whitelist_
);
630 WhitelistEverything(&download_whitelist_
);
634 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
636 std::vector
<SBPrefix
>* prefix_hits
,
637 std::vector
<SBFullHashResult
>* cache_hits
) {
638 // Clear the results first.
639 prefix_hits
->clear();
642 std::vector
<SBFullHash
> full_hashes
;
643 BrowseFullHashesToCheck(url
, false, &full_hashes
);
644 if (full_hashes
.empty())
647 return ContainsBrowseUrlHashes(full_hashes
, prefix_hits
, cache_hits
);
650 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
651 const std::vector
<SBFullHash
>& full_hashes
,
652 std::vector
<SBPrefix
>* prefix_hits
,
653 std::vector
<SBFullHashResult
>* cache_hits
) {
654 // Used to determine cache expiration.
655 const base::Time now
= base::Time::Now();
657 // This function is called on the I/O thread, prevent changes to
658 // filter and caches.
659 base::AutoLock
locked(lookup_lock_
);
661 // |browse_prefix_set_| is empty until it is either read from disk, or the
662 // first update populates it. Bail out without a hit if not yet
664 if (!browse_prefix_set_
.get())
667 for (size_t i
= 0; i
< full_hashes
.size(); ++i
) {
668 if (!GetCachedFullHash(&browse_gethash_cache_
,
672 // No valid cached result, check the database.
673 if (browse_prefix_set_
->Exists(full_hashes
[i
]))
674 prefix_hits
->push_back(full_hashes
[i
].prefix
);
678 // Multiple full hashes could share prefix, remove duplicates.
679 std::sort(prefix_hits
->begin(), prefix_hits
->end());
680 prefix_hits
->erase(std::unique(prefix_hits
->begin(), prefix_hits
->end()),
683 return !prefix_hits
->empty() || !cache_hits
->empty();
686 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
687 const std::vector
<GURL
>& urls
,
688 std::vector
<SBPrefix
>* prefix_hits
) {
689 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
691 // Ignore this check when download checking is not enabled.
692 if (!download_store_
.get())
695 std::vector
<SBPrefix
> prefixes
;
696 GetDownloadUrlPrefixes(urls
, &prefixes
);
697 return MatchAddPrefixes(download_store_
.get(),
698 safe_browsing_util::BINURL
% 2,
703 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL
& url
) {
704 // This method is theoretically thread-safe but we expect all calls to
705 // originate from the IO thread.
706 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
707 std::vector
<SBFullHash
> full_hashes
;
708 BrowseFullHashesToCheck(url
, true, &full_hashes
);
709 return ContainsWhitelistedHashes(csd_whitelist_
, full_hashes
);
712 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL
& url
) {
713 std::vector
<SBFullHash
> full_hashes
;
714 BrowseFullHashesToCheck(url
, true, &full_hashes
);
715 return ContainsWhitelistedHashes(download_whitelist_
, full_hashes
);
718 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
719 const std::vector
<SBPrefix
>& prefixes
,
720 std::vector
<SBPrefix
>* prefix_hits
) {
721 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
722 if (!extension_blacklist_store_
)
725 return MatchAddPrefixes(extension_blacklist_store_
.get(),
726 safe_browsing_util::EXTENSIONBLACKLIST
% 2,
731 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
736 safe_browsing_util::CanonicalizeUrl(url
, &host
, &path
, &query
);
737 std::string url_to_check
= host
+ path
;
739 url_to_check
+= "?" + query
;
740 SBFullHash full_hash
= SBFullHashForString(url_to_check
);
742 // This function can be called on any thread, so lock against any changes
743 base::AutoLock
locked(lookup_lock_
);
745 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
746 // from disk, or the first update populates it. Bail out without a hit if
747 // not yet available.
748 if (!side_effect_free_whitelist_prefix_set_
.get())
751 return side_effect_free_whitelist_prefix_set_
->Exists(full_hash
);
754 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string
& ip_address
) {
755 net::IPAddressNumber ip_number
;
756 if (!net::ParseIPLiteralToNumber(ip_address
, &ip_number
))
758 if (ip_number
.size() == net::kIPv4AddressSize
)
759 ip_number
= net::ConvertIPv4NumberToIPv6Number(ip_number
);
760 if (ip_number
.size() != net::kIPv6AddressSize
)
761 return false; // better safe than sorry.
763 // This function can be called from any thread.
764 base::AutoLock
locked(lookup_lock_
);
765 for (IPBlacklist::const_iterator it
= ip_blacklist_
.begin();
766 it
!= ip_blacklist_
.end();
768 const std::string
& mask
= it
->first
;
769 DCHECK_EQ(mask
.size(), ip_number
.size());
770 std::string
subnet(net::kIPv6AddressSize
, '\0');
771 for (size_t i
= 0; i
< net::kIPv6AddressSize
; ++i
) {
772 subnet
[i
] = ip_number
[i
] & mask
[i
];
774 const std::string hash
= base::SHA1HashString(subnet
);
775 DVLOG(2) << "Lookup Malware IP: "
776 << " ip:" << ip_address
777 << " mask:" << base::HexEncode(mask
.data(), mask
.size())
778 << " subnet:" << base::HexEncode(subnet
.data(), subnet
.size())
779 << " hash:" << base::HexEncode(hash
.data(), hash
.size());
780 if (it
->second
.count(hash
) > 0) {
787 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
788 const std::string
& str
) {
789 std::vector
<SBFullHash
> hashes
;
790 hashes
.push_back(SBFullHashForString(str
));
791 return ContainsWhitelistedHashes(download_whitelist_
, hashes
);
794 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
795 const SBWhitelist
& whitelist
,
796 const std::vector
<SBFullHash
>& hashes
) {
797 base::AutoLock
l(lookup_lock_
);
798 if (whitelist
.second
)
800 for (std::vector
<SBFullHash
>::const_iterator it
= hashes
.begin();
801 it
!= hashes
.end(); ++it
) {
802 if (std::binary_search(whitelist
.first
.begin(), whitelist
.first
.end(),
803 *it
, SBFullHashLess
)) {
810 // Helper to insert add-chunk entries.
811 void SafeBrowsingDatabaseNew::InsertAddChunk(
812 SafeBrowsingStore
* store
,
813 const safe_browsing_util::ListType list_id
,
814 const SBChunkData
& chunk_data
) {
815 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
818 // The server can give us a chunk that we already have because
819 // it's part of a range. Don't add it again.
820 const int chunk_id
= chunk_data
.ChunkNumber();
821 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
822 if (store
->CheckAddChunk(encoded_chunk_id
))
825 store
->SetAddChunk(encoded_chunk_id
);
826 if (chunk_data
.IsPrefix()) {
827 const size_t c
= chunk_data
.PrefixCount();
828 for (size_t i
= 0; i
< c
; ++i
) {
829 STATS_COUNTER("SB.PrefixAdd", 1);
830 store
->WriteAddPrefix(encoded_chunk_id
, chunk_data
.PrefixAt(i
));
833 const size_t c
= chunk_data
.FullHashCount();
834 for (size_t i
= 0; i
< c
; ++i
) {
835 STATS_COUNTER("SB.PrefixAddFull", 1);
836 store
->WriteAddHash(encoded_chunk_id
, chunk_data
.FullHashAt(i
));
841 // Helper to insert sub-chunk entries.
842 void SafeBrowsingDatabaseNew::InsertSubChunk(
843 SafeBrowsingStore
* store
,
844 const safe_browsing_util::ListType list_id
,
845 const SBChunkData
& chunk_data
) {
846 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
849 // The server can give us a chunk that we already have because
850 // it's part of a range. Don't add it again.
851 const int chunk_id
= chunk_data
.ChunkNumber();
852 const int encoded_chunk_id
= EncodeChunkId(chunk_id
, list_id
);
853 if (store
->CheckSubChunk(encoded_chunk_id
))
856 store
->SetSubChunk(encoded_chunk_id
);
857 if (chunk_data
.IsPrefix()) {
858 const size_t c
= chunk_data
.PrefixCount();
859 for (size_t i
= 0; i
< c
; ++i
) {
860 STATS_COUNTER("SB.PrefixSub", 1);
861 const int add_chunk_id
= chunk_data
.AddChunkNumberAt(i
);
862 const int encoded_add_chunk_id
= EncodeChunkId(add_chunk_id
, list_id
);
863 store
->WriteSubPrefix(encoded_chunk_id
, encoded_add_chunk_id
,
864 chunk_data
.PrefixAt(i
));
867 const size_t c
= chunk_data
.FullHashCount();
868 for (size_t i
= 0; i
< c
; ++i
) {
869 STATS_COUNTER("SB.PrefixSubFull", 1);
870 const int add_chunk_id
= chunk_data
.AddChunkNumberAt(i
);
871 const int encoded_add_chunk_id
= EncodeChunkId(add_chunk_id
, list_id
);
872 store
->WriteSubHash(encoded_chunk_id
, encoded_add_chunk_id
,
873 chunk_data
.FullHashAt(i
));
878 void SafeBrowsingDatabaseNew::InsertChunks(
879 const std::string
& list_name
,
880 const std::vector
<SBChunkData
*>& chunks
) {
881 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
883 if (corruption_detected_
|| chunks
.empty())
886 const base::TimeTicks before
= base::TimeTicks::Now();
888 // TODO(shess): The caller should just pass list_id.
889 const safe_browsing_util::ListType list_id
=
890 safe_browsing_util::GetListId(list_name
);
892 SafeBrowsingStore
* store
= GetStore(list_id
);
895 change_detected_
= true;
897 // TODO(shess): I believe that the list is always add or sub. Can this use
898 // that productively?
900 for (size_t i
= 0; i
< chunks
.size(); ++i
) {
901 if (chunks
[i
]->IsAdd()) {
902 InsertAddChunk(store
, list_id
, *chunks
[i
]);
903 } else if (chunks
[i
]->IsSub()) {
904 InsertSubChunk(store
, list_id
, *chunks
[i
]);
909 store
->FinishChunk();
911 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before
);
914 void SafeBrowsingDatabaseNew::DeleteChunks(
915 const std::vector
<SBChunkDelete
>& chunk_deletes
) {
916 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
918 if (corruption_detected_
|| chunk_deletes
.empty())
921 const std::string
& list_name
= chunk_deletes
.front().list_name
;
922 const safe_browsing_util::ListType list_id
=
923 safe_browsing_util::GetListId(list_name
);
925 SafeBrowsingStore
* store
= GetStore(list_id
);
928 change_detected_
= true;
930 for (size_t i
= 0; i
< chunk_deletes
.size(); ++i
) {
931 std::vector
<int> chunk_numbers
;
932 RangesToChunks(chunk_deletes
[i
].chunk_del
, &chunk_numbers
);
933 for (size_t j
= 0; j
< chunk_numbers
.size(); ++j
) {
934 const int encoded_chunk_id
= EncodeChunkId(chunk_numbers
[j
], list_id
);
935 if (chunk_deletes
[i
].is_sub_del
)
936 store
->DeleteSubChunk(encoded_chunk_id
);
938 store
->DeleteAddChunk(encoded_chunk_id
);
943 void SafeBrowsingDatabaseNew::CacheHashResults(
944 const std::vector
<SBPrefix
>& prefixes
,
945 const std::vector
<SBFullHashResult
>& full_hits
,
946 const base::TimeDelta
& cache_lifetime
) {
947 const base::Time expire_after
= base::Time::Now() + cache_lifetime
;
949 // This is called on the I/O thread, lock against updates.
950 base::AutoLock
locked(lookup_lock_
);
952 // Create or reset all cached results for these prefixes.
953 for (size_t i
= 0; i
< prefixes
.size(); ++i
) {
954 browse_gethash_cache_
[prefixes
[i
]] = SBCachedFullHashResult(expire_after
);
957 // Insert any fullhash hits. Note that there may be one, multiple, or no
958 // fullhashes for any given entry in |prefixes|.
959 for (size_t i
= 0; i
< full_hits
.size(); ++i
) {
960 const SBPrefix prefix
= full_hits
[i
].hash
.prefix
;
961 browse_gethash_cache_
[prefix
].full_hashes
.push_back(full_hits
[i
]);
965 bool SafeBrowsingDatabaseNew::UpdateStarted(
966 std::vector
<SBListChunkRanges
>* lists
) {
967 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
970 // If |BeginUpdate()| fails, reset the database.
971 if (!browse_store_
->BeginUpdate()) {
972 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN
);
973 HandleCorruptDatabase();
977 if (download_store_
.get() && !download_store_
->BeginUpdate()) {
978 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN
);
979 HandleCorruptDatabase();
983 if (csd_whitelist_store_
.get() && !csd_whitelist_store_
->BeginUpdate()) {
984 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
);
985 HandleCorruptDatabase();
989 if (download_whitelist_store_
.get() &&
990 !download_whitelist_store_
->BeginUpdate()) {
991 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
);
992 HandleCorruptDatabase();
996 if (extension_blacklist_store_
&&
997 !extension_blacklist_store_
->BeginUpdate()) {
998 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN
);
999 HandleCorruptDatabase();
1003 if (side_effect_free_whitelist_store_
&&
1004 !side_effect_free_whitelist_store_
->BeginUpdate()) {
1005 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN
);
1006 HandleCorruptDatabase();
1010 if (ip_blacklist_store_
&& !ip_blacklist_store_
->BeginUpdate()) {
1011 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN
);
1012 HandleCorruptDatabase();
1017 base::AutoLock
locked(lookup_lock_
);
1018 // Cached fullhash results must be cleared on every database update (whether
1019 // successful or not.)
1020 browse_gethash_cache_
.clear();
1023 UpdateChunkRangesForLists(browse_store_
.get(),
1024 safe_browsing_util::kMalwareList
,
1025 safe_browsing_util::kPhishingList
,
1028 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1029 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1030 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1031 // extra data of that sort.
1032 UpdateChunkRangesForList(download_store_
.get(),
1033 safe_browsing_util::kBinUrlList
, lists
);
1035 UpdateChunkRangesForList(csd_whitelist_store_
.get(),
1036 safe_browsing_util::kCsdWhiteList
, lists
);
1038 UpdateChunkRangesForList(download_whitelist_store_
.get(),
1039 safe_browsing_util::kDownloadWhiteList
, lists
);
1041 UpdateChunkRangesForList(extension_blacklist_store_
.get(),
1042 safe_browsing_util::kExtensionBlacklist
, lists
);
1044 UpdateChunkRangesForList(side_effect_free_whitelist_store_
.get(),
1045 safe_browsing_util::kSideEffectFreeWhitelist
, lists
);
1047 UpdateChunkRangesForList(ip_blacklist_store_
.get(),
1048 safe_browsing_util::kIPBlacklist
, lists
);
1050 corruption_detected_
= false;
1051 change_detected_
= false;
1055 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded
) {
1056 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1058 // The update may have failed due to corrupt storage (for instance,
1059 // an excessive number of invalid add_chunks and sub_chunks).
1060 // Double-check that the databases are valid.
1061 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1062 // sections would allow throwing a corruption error in
1064 if (!update_succeeded
) {
1065 if (!browse_store_
->CheckValidity())
1066 DLOG(ERROR
) << "Safe-browsing browse database corrupt.";
1068 if (download_store_
.get() && !download_store_
->CheckValidity())
1069 DLOG(ERROR
) << "Safe-browsing download database corrupt.";
1071 if (csd_whitelist_store_
.get() && !csd_whitelist_store_
->CheckValidity())
1072 DLOG(ERROR
) << "Safe-browsing csd whitelist database corrupt.";
1074 if (download_whitelist_store_
.get() &&
1075 !download_whitelist_store_
->CheckValidity()) {
1076 DLOG(ERROR
) << "Safe-browsing download whitelist database corrupt.";
1079 if (extension_blacklist_store_
&&
1080 !extension_blacklist_store_
->CheckValidity()) {
1081 DLOG(ERROR
) << "Safe-browsing extension blacklist database corrupt.";
1084 if (side_effect_free_whitelist_store_
&&
1085 !side_effect_free_whitelist_store_
->CheckValidity()) {
1086 DLOG(ERROR
) << "Safe-browsing side-effect free whitelist database "
1090 if (ip_blacklist_store_
&& !ip_blacklist_store_
->CheckValidity()) {
1091 DLOG(ERROR
) << "Safe-browsing IP blacklist database corrupt.";
1095 if (corruption_detected_
)
1098 // Unroll the transaction if there was a protocol error or if the
1099 // transaction was empty. This will leave the prefix set, the
1100 // pending hashes, and the prefix miss cache in place.
1101 if (!update_succeeded
|| !change_detected_
) {
1102 // Track empty updates to answer questions at http://crbug.com/72216 .
1103 if (update_succeeded
&& !change_detected_
)
1104 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1105 browse_store_
->CancelUpdate();
1106 if (download_store_
.get())
1107 download_store_
->CancelUpdate();
1108 if (csd_whitelist_store_
.get())
1109 csd_whitelist_store_
->CancelUpdate();
1110 if (download_whitelist_store_
.get())
1111 download_whitelist_store_
->CancelUpdate();
1112 if (extension_blacklist_store_
)
1113 extension_blacklist_store_
->CancelUpdate();
1114 if (side_effect_free_whitelist_store_
)
1115 side_effect_free_whitelist_store_
->CancelUpdate();
1116 if (ip_blacklist_store_
)
1117 ip_blacklist_store_
->CancelUpdate();
1121 if (download_store_
) {
1122 int64 size_bytes
= UpdateHashPrefixStore(
1123 DownloadDBFilename(filename_base_
),
1124 download_store_
.get(),
1125 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH
);
1126 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1127 static_cast<int>(size_bytes
/ 1024));
1130 UpdateBrowseStore();
1131 UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_
),
1132 csd_whitelist_store_
.get(),
1134 UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_
),
1135 download_whitelist_store_
.get(),
1136 &download_whitelist_
);
1138 if (extension_blacklist_store_
) {
1139 int64 size_bytes
= UpdateHashPrefixStore(
1140 ExtensionBlacklistDBFilename(filename_base_
),
1141 extension_blacklist_store_
.get(),
1142 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH
);
1143 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1144 static_cast<int>(size_bytes
/ 1024));
1147 if (side_effect_free_whitelist_store_
)
1148 UpdateSideEffectFreeWhitelistStore();
1150 if (ip_blacklist_store_
)
1151 UpdateIpBlacklistStore();
1154 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1155 const base::FilePath
& store_filename
,
1156 SafeBrowsingStore
* store
,
1157 SBWhitelist
* whitelist
) {
1161 // Note: |builder| will not be empty. The current data store implementation
1162 // stores all full-length hashes as both full and prefix hashes.
1163 safe_browsing::PrefixSetBuilder builder
;
1164 std::vector
<SBAddFullHash
> full_hashes
;
1165 if (!store
->FinishUpdate(&builder
, &full_hashes
)) {
1166 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH
);
1167 WhitelistEverything(whitelist
);
1171 #if defined(OS_MACOSX)
1172 base::mac::SetFileBackupExclusion(store_filename
);
1175 LoadWhitelist(full_hashes
, whitelist
);
1178 int64
SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1179 const base::FilePath
& store_filename
,
1180 SafeBrowsingStore
* store
,
1181 FailureType failure_type
) {
1182 // These results are not used after this call. Simply ignore the
1183 // returned value after FinishUpdate(...).
1184 safe_browsing::PrefixSetBuilder builder
;
1185 std::vector
<SBAddFullHash
> add_full_hashes_result
;
1187 if (!store
->FinishUpdate(&builder
, &add_full_hashes_result
))
1188 RecordFailure(failure_type
);
1190 #if defined(OS_MACOSX)
1191 base::mac::SetFileBackupExclusion(store_filename
);
1194 return GetFileSizeOrZero(store_filename
);
1197 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1198 // Measure the amount of IO during the filter build.
1199 base::IoCounters io_before
, io_after
;
1200 base::ProcessHandle handle
= base::GetCurrentProcessHandle();
1201 scoped_ptr
<base::ProcessMetrics
> metric(
1202 #if !defined(OS_MACOSX)
1203 base::ProcessMetrics::CreateProcessMetrics(handle
)
1205 // Getting stats only for the current process is enough, so NULL is fine.
1206 base::ProcessMetrics::CreateProcessMetrics(handle
, NULL
)
1210 // IoCounters are currently not supported on Mac, and may not be
1211 // available for Linux, so we check the result and only show IO
1212 // stats if they are available.
1213 const bool got_counters
= metric
->GetIOCounters(&io_before
);
1215 const base::TimeTicks before
= base::TimeTicks::Now();
1217 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1218 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use
1219 // the SBFullHash portion. It would need an accessor on PrefixSet.
1220 safe_browsing::PrefixSetBuilder builder
;
1221 std::vector
<SBAddFullHash
> add_full_hashes
;
1222 if (!browse_store_
->FinishUpdate(&builder
, &add_full_hashes
)) {
1223 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH
);
1227 std::vector
<SBFullHash
> full_hash_results
;
1228 for (size_t i
= 0; i
< add_full_hashes
.size(); ++i
) {
1229 full_hash_results
.push_back(add_full_hashes
[i
].full_hash
);
1232 scoped_ptr
<safe_browsing::PrefixSet
>
1233 prefix_set(builder
.GetPrefixSet(full_hash_results
));
1235 // Swap in the newly built filter.
1237 base::AutoLock
locked(lookup_lock_
);
1238 browse_prefix_set_
.swap(prefix_set
);
1241 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before
);
1243 // Persist the prefix set to disk. Since only this thread changes
1244 // |browse_prefix_set_|, there is no need to lock.
1247 // Gather statistics.
1248 if (got_counters
&& metric
->GetIOCounters(&io_after
)) {
1249 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1250 static_cast<int>(io_after
.ReadTransferCount
-
1251 io_before
.ReadTransferCount
) / 1024);
1252 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1253 static_cast<int>(io_after
.WriteTransferCount
-
1254 io_before
.WriteTransferCount
) / 1024);
1255 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1256 static_cast<int>(io_after
.ReadOperationCount
-
1257 io_before
.ReadOperationCount
));
1258 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1259 static_cast<int>(io_after
.WriteOperationCount
-
1260 io_before
.WriteOperationCount
));
1263 const base::FilePath browse_filename
= BrowseDBFilename(filename_base_
);
1264 const int64 file_size
= GetFileSizeOrZero(browse_filename
);
1265 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1266 static_cast<int>(file_size
/ 1024));
1268 #if defined(OS_MACOSX)
1269 base::mac::SetFileBackupExclusion(browse_filename
);
1273 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1274 safe_browsing::PrefixSetBuilder builder
;
1275 std::vector
<SBAddFullHash
> add_full_hashes_result
;
1277 if (!side_effect_free_whitelist_store_
->FinishUpdate(
1278 &builder
, &add_full_hashes_result
)) {
1279 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH
);
1282 scoped_ptr
<safe_browsing::PrefixSet
>
1283 prefix_set(builder
.GetPrefixSetNoHashes());
1285 // Swap in the newly built prefix set.
1287 base::AutoLock
locked(lookup_lock_
);
1288 side_effect_free_whitelist_prefix_set_
.swap(prefix_set
);
1291 const base::FilePath side_effect_free_whitelist_filename
=
1292 SideEffectFreeWhitelistDBFilename(filename_base_
);
1293 const base::FilePath side_effect_free_whitelist_prefix_set_filename
=
1294 PrefixSetForFilename(side_effect_free_whitelist_filename
);
1295 const base::TimeTicks before
= base::TimeTicks::Now();
1296 const bool write_ok
= side_effect_free_whitelist_prefix_set_
->WriteFile(
1297 side_effect_free_whitelist_prefix_set_filename
);
1298 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1299 base::TimeTicks::Now() - before
);
1302 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE
);
1304 // Gather statistics.
1305 int64 file_size
= GetFileSizeOrZero(
1306 side_effect_free_whitelist_prefix_set_filename
);
1307 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1308 static_cast<int>(file_size
/ 1024));
1309 file_size
= GetFileSizeOrZero(side_effect_free_whitelist_filename
);
1310 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1311 static_cast<int>(file_size
/ 1024));
1313 #if defined(OS_MACOSX)
1314 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename
);
1315 base::mac::SetFileBackupExclusion(
1316 side_effect_free_whitelist_prefix_set_filename
);
1320 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1321 // Note: prefixes will not be empty. The current data store implementation
1322 // stores all full-length hashes as both full and prefix hashes.
1323 safe_browsing::PrefixSetBuilder builder
;
1324 std::vector
<SBAddFullHash
> full_hashes
;
1325 if (!ip_blacklist_store_
->FinishUpdate(&builder
, &full_hashes
)) {
1326 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH
);
1327 LoadIpBlacklist(std::vector
<SBAddFullHash
>()); // Clear the list.
1331 #if defined(OS_MACOSX)
1332 base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_
));
1335 LoadIpBlacklist(full_hashes
);
1338 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1339 // Reset the database after the current task has unwound (but only
1340 // reset once within the scope of a given task).
1341 if (!reset_factory_
.HasWeakPtrs()) {
1342 RecordFailure(FAILURE_DATABASE_CORRUPT
);
1343 base::MessageLoop::current()->PostTask(FROM_HERE
,
1344 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase
,
1345 reset_factory_
.GetWeakPtr()));
1349 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1350 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER
);
1351 corruption_detected_
= true; // Stop updating the database.
1354 // NOTE(shess): ResetDatabase() should remove the corruption, so this should
1355 // only happen once. If you are here because you are hitting this after a
1356 // restart, then I would be very interested in working with you to figure out
1357 // what is happening, since it may affect real users.
1358 DLOG(FATAL
) << "SafeBrowsing database was corrupt and reset";
1361 // TODO(shess): I'm not clear why this code doesn't have any
1362 // real error-handling.
1363 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1364 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1365 DCHECK(!filename_base_
.empty());
1367 const base::FilePath browse_filename
= BrowseDBFilename(filename_base_
);
1368 const base::FilePath browse_prefix_set_filename
=
1369 PrefixSetForFilename(browse_filename
);
1371 // Only use the prefix set if database is present and non-empty.
1372 if (!GetFileSizeOrZero(browse_filename
))
1375 // Cleanup any stale bloom filter (no longer used).
1376 // TODO(shess): Track existence to drive removal of this code?
1377 const base::FilePath bloom_filter_filename
=
1378 BloomFilterForFilename(browse_filename
);
1379 base::DeleteFile(bloom_filter_filename
, false);
1381 const base::TimeTicks before
= base::TimeTicks::Now();
1382 browse_prefix_set_
= safe_browsing::PrefixSet::LoadFile(
1383 browse_prefix_set_filename
);
1384 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before
);
1386 if (!browse_prefix_set_
.get())
1387 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ
);
1390 bool SafeBrowsingDatabaseNew::Delete() {
1391 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1392 DCHECK(!filename_base_
.empty());
1394 // TODO(shess): This is a mess. SafeBrowsingFileStore::Delete() closes the
1395 // store before calling DeleteStore(). DeleteStore() deletes transient files
1396 // in addition to the main file. Probably all of these should be converted to
1397 // a helper which calls Delete() if the store exists, else DeleteStore() on
1398 // the generated filename.
1400 // TODO(shess): Determine if the histograms are useful in any way. I cannot
1401 // recall any action taken as a result of their values, in which case it might
1402 // make more sense to histogram an overall thumbs-up/-down and just dig deeper
1403 // if something looks wrong.
1405 const bool r1
= browse_store_
->Delete();
1407 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1409 const bool r2
= download_store_
.get() ? download_store_
->Delete() : true;
1411 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1413 const bool r3
= csd_whitelist_store_
.get() ?
1414 csd_whitelist_store_
->Delete() : true;
1416 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1418 const bool r4
= download_whitelist_store_
.get() ?
1419 download_whitelist_store_
->Delete() : true;
1421 RecordFailure(FAILURE_DATABASE_STORE_DELETE
);
1423 const base::FilePath browse_filename
= BrowseDBFilename(filename_base_
);
1424 const base::FilePath bloom_filter_filename
=
1425 BloomFilterForFilename(browse_filename
);
1426 const bool r5
= base::DeleteFile(bloom_filter_filename
, false);
1428 RecordFailure(FAILURE_DATABASE_FILTER_DELETE
);
1430 const base::FilePath browse_prefix_set_filename
=
1431 PrefixSetForFilename(browse_filename
);
1432 const bool r6
= base::DeleteFile(browse_prefix_set_filename
, false);
1434 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE
);
1436 const base::FilePath extension_blacklist_filename
=
1437 ExtensionBlacklistDBFilename(filename_base_
);
1438 const bool r7
= base::DeleteFile(extension_blacklist_filename
, false);
1440 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE
);
1442 const base::FilePath side_effect_free_whitelist_filename
=
1443 SideEffectFreeWhitelistDBFilename(filename_base_
);
1444 const bool r8
= base::DeleteFile(side_effect_free_whitelist_filename
,
1447 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE
);
1449 const base::FilePath side_effect_free_whitelist_prefix_set_filename
=
1450 PrefixSetForFilename(side_effect_free_whitelist_filename
);
1451 const bool r9
= base::DeleteFile(
1452 side_effect_free_whitelist_prefix_set_filename
,
1455 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE
);
1457 const bool r10
= base::DeleteFile(IpBlacklistDBFilename(filename_base_
),
1460 RecordFailure(FAILURE_IP_BLACKLIST_DELETE
);
1462 return r1
&& r2
&& r3
&& r4
&& r5
&& r6
&& r7
&& r8
&& r9
&& r10
;
1465 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1466 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1468 if (!browse_prefix_set_
.get())
1471 const base::FilePath browse_filename
= BrowseDBFilename(filename_base_
);
1472 const base::FilePath browse_prefix_set_filename
=
1473 PrefixSetForFilename(browse_filename
);
1475 const base::TimeTicks before
= base::TimeTicks::Now();
1476 const bool write_ok
= browse_prefix_set_
->WriteFile(
1477 browse_prefix_set_filename
);
1478 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before
);
1480 const int64 file_size
= GetFileSizeOrZero(browse_prefix_set_filename
);
1481 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1482 static_cast<int>(file_size
/ 1024));
1485 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE
);
1487 #if defined(OS_MACOSX)
1488 base::mac::SetFileBackupExclusion(browse_prefix_set_filename
);
1492 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist
* whitelist
) {
1493 base::AutoLock
locked(lookup_lock_
);
1494 whitelist
->second
= true;
1495 whitelist
->first
.clear();
1498 void SafeBrowsingDatabaseNew::LoadWhitelist(
1499 const std::vector
<SBAddFullHash
>& full_hashes
,
1500 SBWhitelist
* whitelist
) {
1501 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1502 if (full_hashes
.size() > kMaxWhitelistSize
) {
1503 WhitelistEverything(whitelist
);
1507 std::vector
<SBFullHash
> new_whitelist
;
1508 new_whitelist
.reserve(full_hashes
.size());
1509 for (std::vector
<SBAddFullHash
>::const_iterator it
= full_hashes
.begin();
1510 it
!= full_hashes
.end(); ++it
) {
1511 new_whitelist
.push_back(it
->full_hash
);
1513 std::sort(new_whitelist
.begin(), new_whitelist
.end(), SBFullHashLess
);
1515 SBFullHash kill_switch
= SBFullHashForString(kWhitelistKillSwitchUrl
);
1516 if (std::binary_search(new_whitelist
.begin(), new_whitelist
.end(),
1517 kill_switch
, SBFullHashLess
)) {
1518 // The kill switch is whitelisted hence we whitelist all URLs.
1519 WhitelistEverything(whitelist
);
1521 base::AutoLock
locked(lookup_lock_
);
1522 whitelist
->second
= false;
1523 whitelist
->first
.swap(new_whitelist
);
1527 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1528 const std::vector
<SBAddFullHash
>& full_hashes
) {
1529 DCHECK_EQ(creation_loop_
, base::MessageLoop::current());
1530 IPBlacklist new_blacklist
;
1531 for (std::vector
<SBAddFullHash
>::const_iterator it
= full_hashes
.begin();
1532 it
!= full_hashes
.end();
1534 const char* full_hash
= it
->full_hash
.full_hash
;
1535 DCHECK_EQ(crypto::kSHA256Length
, arraysize(it
->full_hash
.full_hash
));
1536 // The format of the IP blacklist is:
1537 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1538 std::string
hashed_ip_prefix(full_hash
, base::kSHA1Length
);
1539 size_t prefix_size
= static_cast<uint8
>(full_hash
[base::kSHA1Length
]);
1540 if (prefix_size
> kMaxIpPrefixSize
|| prefix_size
< kMinIpPrefixSize
) {
1541 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID
);
1542 new_blacklist
.clear(); // Load empty blacklist.
1546 // We precompute the mask for the given subnet size to speed up lookups.
1547 // Basically we need to create a 16B long string which has the highest
1548 // |size| bits sets to one.
1549 std::string
mask(net::kIPv6AddressSize
, '\0');
1550 mask
.replace(0, prefix_size
/ 8, prefix_size
/ 8, '\xFF');
1551 if ((prefix_size
% 8) != 0) {
1552 mask
[prefix_size
/ 8] = 0xFF << (8 - (prefix_size
% 8));
1554 DVLOG(2) << "Inserting malicious IP: "
1555 << " raw:" << base::HexEncode(full_hash
, crypto::kSHA256Length
)
1556 << " mask:" << base::HexEncode(mask
.data(), mask
.size())
1557 << " prefix_size:" << prefix_size
1558 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix
.data(),
1559 hashed_ip_prefix
.size());
1560 new_blacklist
[mask
].insert(hashed_ip_prefix
);
1563 base::AutoLock
locked(lookup_lock_
);
1564 ip_blacklist_
.swap(new_blacklist
);
1567 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1568 SBFullHash malware_kill_switch
= SBFullHashForString(kMalwareIPKillSwitchUrl
);
1569 std::vector
<SBFullHash
> full_hashes
;
1570 full_hashes
.push_back(malware_kill_switch
);
1571 return ContainsWhitelistedHashes(csd_whitelist_
, full_hashes
);
1574 bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() {
1575 return csd_whitelist_
.second
;