1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
13 #include "base/containers/hash_tables.h"
14 #include "base/files/file_path.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/synchronization/lock.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
26 namespace safe_browsing
{
31 class SafeBrowsingDatabase
;
33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
34 // to create fake Databases for testing.
35 class SafeBrowsingDatabaseFactory
{
37 SafeBrowsingDatabaseFactory() { }
38 virtual ~SafeBrowsingDatabaseFactory() { }
39 virtual SafeBrowsingDatabase
* CreateSafeBrowsingDatabase(
40 bool enable_download_protection
,
41 bool enable_client_side_whitelist
,
42 bool enable_download_whitelist
,
43 bool enable_extension_blacklist
,
44 bool enable_side_effect_free_whitelist
,
45 bool enable_ip_blacklist
) = 0;
47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory
);
50 // Contains full_hash elements which are cached in memory. Differs from
51 // SBAddFullHash in deriving |list_id| from |chunk_id|. Differs from
52 // SBFullHashResult in adding |received| for later expiration.
53 // TODO(shess): Remove/refactor this as part of converting to v2.3 caching
55 struct SBFullHashCached
{
57 int list_id
; // TODO(shess): Use safe_browsing_util::ListType.
58 base::Time expire_after
;
61 // Encapsulates on-disk databases that for safebrowsing. There are
62 // four databases: browse, download, download whitelist and
63 // client-side detection (csd) whitelist databases. The browse database contains
64 // information about phishing and malware urls. The download database contains
65 // URLs for bad binaries (e.g: those containing virus) and hash of
66 // these downloaded contents. The download whitelist contains whitelisted
67 // download hosting sites as well as whitelisted binary signing certificates
68 // etc. The csd whitelist database contains URLs that will never be considered
69 // as phishing by the client-side phishing detection. These on-disk databases
70 // are shared among all profiles, as it doesn't contain user-specific data. This
71 // object is not thread-safe, i.e. all its methods should be used on the same
72 // thread that it was created on.
73 class SafeBrowsingDatabase
{
75 // Factory method for obtaining a SafeBrowsingDatabase implementation.
76 // It is not thread safe.
77 // |enable_download_protection| is used to control the download database
79 // |enable_client_side_whitelist| is used to control the csd whitelist
81 // |enable_download_whitelist| is used to control the download whitelist
83 // |enable_ip_blacklist| is used to control the csd malware IP blacklist
85 static SafeBrowsingDatabase
* Create(bool enable_download_protection
,
86 bool enable_client_side_whitelist
,
87 bool enable_download_whitelist
,
88 bool enable_extension_blacklist
,
89 bool side_effect_free_whitelist
,
90 bool enable_ip_blacklist
);
92 // Makes the passed |factory| the factory used to instantiate
93 // a SafeBrowsingDatabase. This is used for tests.
94 static void RegisterFactory(SafeBrowsingDatabaseFactory
* factory
) {
98 virtual ~SafeBrowsingDatabase();
100 // Initializes the database with the given filename.
101 virtual void Init(const base::FilePath
& filename
) = 0;
103 // Deletes the current database and creates a new one.
104 virtual bool ResetDatabase() = 0;
106 // Returns false if |url| is not in the browse database. If it returns true,
107 // then |prefix_hits| contains the list of prefix matches, and |cache_hits|
108 // contains the cached gethash results for those prefixes (if any). This
109 // function is safe to call from threads other than the creation thread.
110 virtual bool ContainsBrowseUrl(
112 std::vector
<SBPrefix
>* prefix_hits
,
113 std::vector
<SBFullHashResult
>* cache_hits
) = 0;
115 // Returns false if none of |urls| are in Download database. If it returns
116 // true, |prefix_hits| should contain the prefixes for the URLs that were in
117 // the database. This function could ONLY be accessed from creation thread.
118 virtual bool ContainsDownloadUrl(const std::vector
<GURL
>& urls
,
119 std::vector
<SBPrefix
>* prefix_hits
) = 0;
121 // Returns false if |url| is not on the client-side phishing detection
122 // whitelist. Otherwise, this function returns true. Note: the whitelist
123 // only contains full-length hashes so we don't return any prefix hit.
124 // This function should only be called from the IO thread.
125 virtual bool ContainsCsdWhitelistedUrl(const GURL
& url
) = 0;
127 // The download whitelist is used for two purposes: a white-domain list of
128 // sites that are considered to host only harmless binaries as well as a
129 // whitelist of arbitrary strings such as hashed certificate authorities that
130 // are considered to be trusted. The two methods below let you lookup
131 // the whitelist either for a URL or an arbitrary string. These methods will
132 // return false if no match is found and true otherwise.
133 // This function could ONLY be accessed from the IO thread.
134 virtual bool ContainsDownloadWhitelistedUrl(const GURL
& url
) = 0;
135 virtual bool ContainsDownloadWhitelistedString(const std::string
& str
) = 0;
137 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
140 // This function can ONLY be accessed from the creation thread.
141 virtual bool ContainsExtensionPrefixes(
142 const std::vector
<SBPrefix
>& prefixes
,
143 std::vector
<SBPrefix
>* prefix_hits
) = 0;
145 // Returns false unless the hash of |url| is on the side-effect free
147 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL
& url
) = 0;
149 // Returns true iff the given IP is currently on the csd malware IP blacklist.
150 virtual bool ContainsMalwareIP(const std::string
& ip_address
) = 0;
152 // A database transaction should look like:
154 // std::vector<SBListChunkRanges> lists;
155 // if (db.UpdateStarted(&lists)) {
156 // // Do something with |lists|.
158 // // Process add/sub commands.
159 // db.InsertChunks(list_name, chunks);
161 // // Process adddel/subdel commands.
162 // db.DeleteChunks(chunks_deletes);
164 // // If passed true, processes the collected chunk info and
165 // // rebuilds the filter. If passed false, rolls everything
167 // db.UpdateFinished(success);
170 // If UpdateStarted() returns true, the caller MUST eventually call
171 // UpdateFinished(). If it returns false, the caller MUST NOT call
172 // the other functions.
173 virtual bool UpdateStarted(std::vector
<SBListChunkRanges
>* lists
) = 0;
174 virtual void InsertChunks(const std::string
& list_name
,
175 const SBChunkList
& chunks
) = 0;
176 virtual void DeleteChunks(
177 const std::vector
<SBChunkDelete
>& chunk_deletes
) = 0;
178 virtual void UpdateFinished(bool update_succeeded
) = 0;
180 // Store the results of a GetHash response. In the case of empty results, we
181 // cache the prefixes until the next update so that we don't have to issue
182 // further GetHash requests we know will be empty.
183 virtual void CacheHashResults(
184 const std::vector
<SBPrefix
>& prefixes
,
185 const std::vector
<SBFullHashResult
>& full_hits
,
186 const base::TimeDelta
& cache_lifetime
) = 0;
188 // Returns true if the malware IP blacklisting killswitch URL is present
189 // in the csd whitelist.
190 virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
192 // Returns true if the whitelist killswitch URL is present in the csd
194 virtual bool IsCsdWhitelistKillSwitchOn() = 0;
196 // The name of the bloom-filter file for the given database file.
197 // NOTE(shess): OBSOLETE. Present for deleting stale files.
198 static base::FilePath
BloomFilterForFilename(
199 const base::FilePath
& db_filename
);
201 // The name of the prefix set file for the given database file.
202 static base::FilePath
PrefixSetForFilename(const base::FilePath
& db_filename
);
204 // Filename for malware and phishing URL database.
205 static base::FilePath
BrowseDBFilename(
206 const base::FilePath
& db_base_filename
);
208 // Filename for download URL and download binary hash database.
209 static base::FilePath
DownloadDBFilename(
210 const base::FilePath
& db_base_filename
);
212 // Filename for client-side phishing detection whitelist databsae.
213 static base::FilePath
CsdWhitelistDBFilename(
214 const base::FilePath
& csd_whitelist_base_filename
);
216 // Filename for download whitelist databsae.
217 static base::FilePath
DownloadWhitelistDBFilename(
218 const base::FilePath
& download_whitelist_base_filename
);
220 // Filename for extension blacklist database.
221 static base::FilePath
ExtensionBlacklistDBFilename(
222 const base::FilePath
& extension_blacklist_base_filename
);
224 // Filename for side-effect free whitelist database.
225 static base::FilePath
SideEffectFreeWhitelistDBFilename(
226 const base::FilePath
& side_effect_free_whitelist_base_filename
);
228 // Filename for the csd malware IP blacklist database.
229 static base::FilePath
IpBlacklistDBFilename(
230 const base::FilePath
& ip_blacklist_base_filename
);
232 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE
233 // ORDERING OF THESE VALUES.
235 FAILURE_DATABASE_CORRUPT
,
236 FAILURE_DATABASE_CORRUPT_HANDLER
,
237 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN
,
238 FAILURE_BROWSE_DATABASE_UPDATE_FINISH
,
239 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE
,
240 FAILURE_DATABASE_FILTER_READ_OBSOLETE
,
241 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE
,
242 FAILURE_DATABASE_FILTER_DELETE
,
243 FAILURE_DATABASE_STORE_MISSING
,
244 FAILURE_DATABASE_STORE_DELETE
,
245 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN
,
246 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH
,
247 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN
,
248 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH
,
249 FAILURE_BROWSE_PREFIX_SET_MISSING
,
250 FAILURE_BROWSE_PREFIX_SET_READ
,
251 FAILURE_BROWSE_PREFIX_SET_WRITE
,
252 FAILURE_BROWSE_PREFIX_SET_DELETE
,
253 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN
,
254 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH
,
255 FAILURE_EXTENSION_BLACKLIST_DELETE
,
256 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN
,
257 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH
,
258 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE
,
259 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ
,
260 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE
,
261 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE
,
262 FAILURE_IP_BLACKLIST_UPDATE_BEGIN
,
263 FAILURE_IP_BLACKLIST_UPDATE_FINISH
,
264 FAILURE_IP_BLACKLIST_UPDATE_INVALID
,
265 FAILURE_IP_BLACKLIST_DELETE
,
267 // Memory space for histograms is determined by the max. ALWAYS
268 // ADD NEW VALUES BEFORE THIS ONE.
272 static void RecordFailure(FailureType failure_type
);
275 // The factory used to instantiate a SafeBrowsingDatabase object.
276 // Useful for tests, so they can provide their own implementation of
277 // SafeBrowsingDatabase.
278 static SafeBrowsingDatabaseFactory
* factory_
;
281 class SafeBrowsingDatabaseNew
: public SafeBrowsingDatabase
{
283 // Create a database with a browse, download, download whitelist and
284 // csd whitelist store objects. Takes ownership of all the store objects.
285 // When |download_store| is NULL, the database will ignore any operations
286 // related download (url hashes and binary hashes). The same is true for
287 // the |csd_whitelist_store|, |download_whitelist_store| and
288 // |ip_blacklist_store|.
289 SafeBrowsingDatabaseNew(SafeBrowsingStore
* browse_store
,
290 SafeBrowsingStore
* download_store
,
291 SafeBrowsingStore
* csd_whitelist_store
,
292 SafeBrowsingStore
* download_whitelist_store
,
293 SafeBrowsingStore
* extension_blacklist_store
,
294 SafeBrowsingStore
* side_effect_free_whitelist_store
,
295 SafeBrowsingStore
* ip_blacklist_store
);
297 // Create a database with a browse store. This is a legacy interface that
299 SafeBrowsingDatabaseNew();
301 virtual ~SafeBrowsingDatabaseNew();
303 // Implement SafeBrowsingDatabase interface.
304 virtual void Init(const base::FilePath
& filename
) OVERRIDE
;
305 virtual bool ResetDatabase() OVERRIDE
;
306 virtual bool ContainsBrowseUrl(
308 std::vector
<SBPrefix
>* prefix_hits
,
309 std::vector
<SBFullHashResult
>* cache_hits
) OVERRIDE
;
310 virtual bool ContainsDownloadUrl(const std::vector
<GURL
>& urls
,
311 std::vector
<SBPrefix
>* prefix_hits
) OVERRIDE
;
312 virtual bool ContainsCsdWhitelistedUrl(const GURL
& url
) OVERRIDE
;
313 virtual bool ContainsDownloadWhitelistedUrl(const GURL
& url
) OVERRIDE
;
314 virtual bool ContainsDownloadWhitelistedString(
315 const std::string
& str
) OVERRIDE
;
316 virtual bool ContainsExtensionPrefixes(
317 const std::vector
<SBPrefix
>& prefixes
,
318 std::vector
<SBPrefix
>* prefix_hits
) OVERRIDE
;
319 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL
& url
) OVERRIDE
;
320 virtual bool ContainsMalwareIP(const std::string
& ip_address
) OVERRIDE
;
321 virtual bool UpdateStarted(std::vector
<SBListChunkRanges
>* lists
) OVERRIDE
;
322 virtual void InsertChunks(const std::string
& list_name
,
323 const SBChunkList
& chunks
) OVERRIDE
;
324 virtual void DeleteChunks(
325 const std::vector
<SBChunkDelete
>& chunk_deletes
) OVERRIDE
;
326 virtual void UpdateFinished(bool update_succeeded
) OVERRIDE
;
327 virtual void CacheHashResults(
328 const std::vector
<SBPrefix
>& prefixes
,
329 const std::vector
<SBFullHashResult
>& full_hits
,
330 const base::TimeDelta
& cache_lifetime
) OVERRIDE
;
332 // Returns the value of malware_kill_switch_;
333 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE
;
335 // Returns true if the CSD whitelist has everything whitelisted.
336 virtual bool IsCsdWhitelistKillSwitchOn() OVERRIDE
;
339 friend class SafeBrowsingDatabaseTest
;
340 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest
, HashCaching
);
342 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
343 // in a sorted vector) as well as a boolean flag indicating whether all
344 // lookups in the whitelist should be considered matches for safety.
345 typedef std::pair
<std::vector
<SBFullHash
>, bool> SBWhitelist
;
347 // This map holds a csd malware IP blacklist which maps a prefix mask
348 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed
349 // IPv6 IP prefix using SHA-1.
350 typedef std::map
<std::string
, base::hash_set
<std::string
> > IPBlacklist
;
352 // Returns true if the whitelist is disabled or if any of the given hashes
353 // matches the whitelist.
354 bool ContainsWhitelistedHashes(const SBWhitelist
& whitelist
,
355 const std::vector
<SBFullHash
>& hashes
);
357 // Return the browse_store_, download_store_, download_whitelist_store or
358 // csd_whitelist_store_ based on list_id.
359 SafeBrowsingStore
* GetStore(int list_id
);
361 // Deletes the files on disk.
364 // Load the prefix set off disk, if available.
365 void LoadPrefixSet();
367 // Writes the current prefix set to disk.
368 void WritePrefixSet();
370 // Loads the given full-length hashes to the given whitelist. If the number
371 // of hashes is too large or if the kill switch URL is on the whitelist
372 // we will whitelist everything.
373 void LoadWhitelist(const std::vector
<SBAddFullHash
>& full_hashes
,
374 SBWhitelist
* whitelist
);
376 // Call this method if an error occured with the given whitelist. This will
377 // result in all lookups to the whitelist to return true.
378 void WhitelistEverything(SBWhitelist
* whitelist
);
380 // Parses the IP blacklist from the given full-length hashes.
381 void LoadIpBlacklist(const std::vector
<SBAddFullHash
>& full_hashes
);
383 // Helpers for handling database corruption.
384 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
385 // |corruption_detected_|, |HandleCorruptDatabase()| posts
386 // |OnHandleCorruptDatabase()| to the current thread, to be run
387 // after the current task completes.
388 // TODO(shess): Wire things up to entirely abort the update
389 // transaction when this happens.
390 void HandleCorruptDatabase();
391 void OnHandleCorruptDatabase();
393 // Helpers for InsertChunks().
394 void InsertAdd(int chunk
, SBPrefix host
, const SBEntry
* entry
, int list_id
);
395 void InsertAddChunks(safe_browsing_util::ListType list_id
,
396 const SBChunkList
& chunks
);
397 void InsertSub(int chunk
, SBPrefix host
, const SBEntry
* entry
, int list_id
);
398 void InsertSubChunks(safe_browsing_util::ListType list_id
,
399 const SBChunkList
& chunks
);
401 // Returns the size in bytes of the store after the update.
402 int64
UpdateHashPrefixStore(const base::FilePath
& store_filename
,
403 SafeBrowsingStore
* store
,
404 FailureType failure_type
);
405 void UpdateBrowseStore();
406 void UpdateSideEffectFreeWhitelistStore();
407 void UpdateWhitelistStore(const base::FilePath
& store_filename
,
408 SafeBrowsingStore
* store
,
409 SBWhitelist
* whitelist
);
410 void UpdateIpBlacklistStore();
412 // Used to verify that various calls are made from the thread the
413 // object was created on.
414 base::MessageLoop
* creation_loop_
;
416 // Lock for protecting access to variables that may be used on the
417 // IO thread. This includes |prefix_set_|, |cached_browse_hashes_|,
418 // |prefix_miss_cache_|, |csd_whitelist_|.
419 base::Lock lookup_lock_
;
421 // Underlying persistent store for chunk data.
422 // For browsing related (phishing and malware URLs) chunks and prefixes.
423 base::FilePath browse_filename_
;
424 scoped_ptr
<SafeBrowsingStore
> browse_store_
;
426 // For download related (download URL and binary hash) chunks and prefixes.
427 base::FilePath download_filename_
;
428 scoped_ptr
<SafeBrowsingStore
> download_store_
;
430 // For the client-side phishing detection whitelist chunks and full-length
431 // hashes. This list only contains 256 bit hashes.
432 base::FilePath csd_whitelist_filename_
;
433 scoped_ptr
<SafeBrowsingStore
> csd_whitelist_store_
;
435 // For the download whitelist chunks and full-length hashes. This list only
436 // contains 256 bit hashes.
437 base::FilePath download_whitelist_filename_
;
438 scoped_ptr
<SafeBrowsingStore
> download_whitelist_store_
;
440 // For extension IDs.
441 base::FilePath extension_blacklist_filename_
;
442 scoped_ptr
<SafeBrowsingStore
> extension_blacklist_store_
;
444 // For side-effect free whitelist.
445 base::FilePath side_effect_free_whitelist_filename_
;
446 scoped_ptr
<SafeBrowsingStore
> side_effect_free_whitelist_store_
;
449 base::FilePath ip_blacklist_filename_
;
450 scoped_ptr
<SafeBrowsingStore
> ip_blacklist_store_
;
452 SBWhitelist csd_whitelist_
;
453 SBWhitelist download_whitelist_
;
454 SBWhitelist extension_blacklist_
;
456 // The IP blacklist should be small. At most a couple hundred IPs.
457 IPBlacklist ip_blacklist_
;
459 // Store items from CacheHashResults(), ordered by hash for efficient
460 // scanning. Discarded on next update.
461 std::vector
<SBFullHashCached
> cached_browse_hashes_
;
463 // Cache of prefixes that returned empty results (no full hash
464 // match) to |CacheHashResults()|. Cached to prevent asking for
465 // them every time. Cleared on next update.
466 std::set
<SBPrefix
> prefix_miss_cache_
;
468 // Used to schedule resetting the database because of corruption.
469 base::WeakPtrFactory
<SafeBrowsingDatabaseNew
> reset_factory_
;
471 // Set if corruption is detected during the course of an update.
472 // Causes the update functions to fail with no side effects, until
473 // the next call to |UpdateStarted()|.
474 bool corruption_detected_
;
476 // Set to true if any chunks are added or deleted during an update.
477 // Used to optimize away database update.
478 bool change_detected_
;
480 // Used to check if a prefix was in the browse database.
481 base::FilePath browse_prefix_set_filename_
;
482 scoped_ptr
<safe_browsing::PrefixSet
> browse_prefix_set_
;
484 // Used to check if a prefix was in the browse database.
485 base::FilePath side_effect_free_whitelist_prefix_set_filename_
;
486 scoped_ptr
<safe_browsing::PrefixSet
> side_effect_free_whitelist_prefix_set_
;
489 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_