Update mojo surfaces bindings and mojo/cc/ glue
[chromium-blink-merge.git] / chrome / browser / safe_browsing / safe_browsing_database.h
blob09b22aad278ca106371391540a2e262d1f231faa
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
13 #include "base/containers/hash_tables.h"
14 #include "base/files/file_path.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/synchronization/lock.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
22 namespace base {
23 class MessageLoop;
26 namespace safe_browsing {
27 class PrefixSet;
30 class GURL;
31 class SafeBrowsingDatabase;
33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
34 // to create fake Databases for testing.
35 class SafeBrowsingDatabaseFactory {
36 public:
37 SafeBrowsingDatabaseFactory() { }
38 virtual ~SafeBrowsingDatabaseFactory() { }
39 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
40 bool enable_download_protection,
41 bool enable_client_side_whitelist,
42 bool enable_download_whitelist,
43 bool enable_extension_blacklist,
44 bool enable_side_effect_free_whitelist,
45 bool enable_ip_blacklist) = 0;
46 private:
47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
50 // Encapsulates on-disk databases that for safebrowsing. There are
51 // four databases: browse, download, download whitelist and
52 // client-side detection (csd) whitelist databases. The browse database contains
53 // information about phishing and malware urls. The download database contains
54 // URLs for bad binaries (e.g: those containing virus) and hash of
55 // these downloaded contents. The download whitelist contains whitelisted
56 // download hosting sites as well as whitelisted binary signing certificates
57 // etc. The csd whitelist database contains URLs that will never be considered
58 // as phishing by the client-side phishing detection. These on-disk databases
59 // are shared among all profiles, as it doesn't contain user-specific data. This
60 // object is not thread-safe, i.e. all its methods should be used on the same
61 // thread that it was created on.
62 class SafeBrowsingDatabase {
63 public:
64 // Factory method for obtaining a SafeBrowsingDatabase implementation.
65 // It is not thread safe.
66 // |enable_download_protection| is used to control the download database
67 // feature.
68 // |enable_client_side_whitelist| is used to control the csd whitelist
69 // database feature.
70 // |enable_download_whitelist| is used to control the download whitelist
71 // database feature.
72 // |enable_ip_blacklist| is used to control the csd malware IP blacklist
73 // database feature.
74 static SafeBrowsingDatabase* Create(bool enable_download_protection,
75 bool enable_client_side_whitelist,
76 bool enable_download_whitelist,
77 bool enable_extension_blacklist,
78 bool side_effect_free_whitelist,
79 bool enable_ip_blacklist);
81 // Makes the passed |factory| the factory used to instantiate
82 // a SafeBrowsingDatabase. This is used for tests.
83 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
84 factory_ = factory;
87 virtual ~SafeBrowsingDatabase();
89 // Initializes the database with the given filename.
90 virtual void Init(const base::FilePath& filename) = 0;
92 // Deletes the current database and creates a new one.
93 virtual bool ResetDatabase() = 0;
95 // Returns false if |url| is not in the browse database or already was cached
96 // as a miss. If it returns true, |prefix_hits| contains matching hash
97 // prefixes which had no cached results and |cache_hits| contains any matching
98 // cached gethash results. This function is safe to call from any thread.
99 virtual bool ContainsBrowseUrl(
100 const GURL& url,
101 std::vector<SBPrefix>* prefix_hits,
102 std::vector<SBFullHashResult>* cache_hits) = 0;
104 // Returns false if none of |urls| are in Download database. If it returns
105 // true, |prefix_hits| should contain the prefixes for the URLs that were in
106 // the database. This function could ONLY be accessed from creation thread.
107 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
108 std::vector<SBPrefix>* prefix_hits) = 0;
110 // Returns false if |url| is not on the client-side phishing detection
111 // whitelist. Otherwise, this function returns true. Note: the whitelist
112 // only contains full-length hashes so we don't return any prefix hit.
113 // This function should only be called from the IO thread.
114 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
116 // The download whitelist is used for two purposes: a white-domain list of
117 // sites that are considered to host only harmless binaries as well as a
118 // whitelist of arbitrary strings such as hashed certificate authorities that
119 // are considered to be trusted. The two methods below let you lookup
120 // the whitelist either for a URL or an arbitrary string. These methods will
121 // return false if no match is found and true otherwise.
122 // This function could ONLY be accessed from the IO thread.
123 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0;
124 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0;
126 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
127 // in the database.
129 // This function can ONLY be accessed from the creation thread.
130 virtual bool ContainsExtensionPrefixes(
131 const std::vector<SBPrefix>& prefixes,
132 std::vector<SBPrefix>* prefix_hits) = 0;
134 // Returns false unless the hash of |url| is on the side-effect free
135 // whitelist.
136 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0;
138 // Returns true iff the given IP is currently on the csd malware IP blacklist.
139 virtual bool ContainsMalwareIP(const std::string& ip_address) = 0;
141 // A database transaction should look like:
143 // std::vector<SBListChunkRanges> lists;
144 // if (db.UpdateStarted(&lists)) {
145 // // Do something with |lists|.
147 // // Process add/sub commands.
148 // db.InsertChunks(list_name, chunks);
150 // // Process adddel/subdel commands.
151 // db.DeleteChunks(chunks_deletes);
153 // // If passed true, processes the collected chunk info and
154 // // rebuilds the filter. If passed false, rolls everything
155 // // back.
156 // db.UpdateFinished(success);
157 // }
159 // If UpdateStarted() returns true, the caller MUST eventually call
160 // UpdateFinished(). If it returns false, the caller MUST NOT call
161 // the other functions.
162 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
163 virtual void InsertChunks(const std::string& list_name,
164 const std::vector<SBChunkData*>& chunks) = 0;
165 virtual void DeleteChunks(
166 const std::vector<SBChunkDelete>& chunk_deletes) = 0;
167 virtual void UpdateFinished(bool update_succeeded) = 0;
169 // Store the results of a GetHash response. In the case of empty results, we
170 // cache the prefixes until the next update so that we don't have to issue
171 // further GetHash requests we know will be empty.
172 virtual void CacheHashResults(
173 const std::vector<SBPrefix>& prefixes,
174 const std::vector<SBFullHashResult>& full_hits,
175 const base::TimeDelta& cache_lifetime) = 0;
177 // Returns true if the malware IP blacklisting killswitch URL is present
178 // in the csd whitelist.
179 virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
181 // Returns true if the whitelist killswitch URL is present in the csd
182 // whitelist.
183 virtual bool IsCsdWhitelistKillSwitchOn() = 0;
185 // The name of the bloom-filter file for the given database file.
186 // NOTE(shess): OBSOLETE. Present for deleting stale files.
187 static base::FilePath BloomFilterForFilename(
188 const base::FilePath& db_filename);
190 // The name of the prefix set file for the given database file.
191 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename);
193 // Filename for malware and phishing URL database.
194 static base::FilePath BrowseDBFilename(
195 const base::FilePath& db_base_filename);
197 // Filename for download URL and download binary hash database.
198 static base::FilePath DownloadDBFilename(
199 const base::FilePath& db_base_filename);
201 // Filename for client-side phishing detection whitelist databsae.
202 static base::FilePath CsdWhitelistDBFilename(
203 const base::FilePath& csd_whitelist_base_filename);
205 // Filename for download whitelist databsae.
206 static base::FilePath DownloadWhitelistDBFilename(
207 const base::FilePath& download_whitelist_base_filename);
209 // Filename for extension blacklist database.
210 static base::FilePath ExtensionBlacklistDBFilename(
211 const base::FilePath& extension_blacklist_base_filename);
213 // Filename for side-effect free whitelist database.
214 static base::FilePath SideEffectFreeWhitelistDBFilename(
215 const base::FilePath& side_effect_free_whitelist_base_filename);
217 // Filename for the csd malware IP blacklist database.
218 static base::FilePath IpBlacklistDBFilename(
219 const base::FilePath& ip_blacklist_base_filename);
221 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE
222 // ORDERING OF THESE VALUES.
223 enum FailureType {
224 FAILURE_DATABASE_CORRUPT,
225 FAILURE_DATABASE_CORRUPT_HANDLER,
226 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
227 FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
228 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE,
229 FAILURE_DATABASE_FILTER_READ_OBSOLETE,
230 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE,
231 FAILURE_DATABASE_FILTER_DELETE,
232 FAILURE_DATABASE_STORE_MISSING,
233 FAILURE_DATABASE_STORE_DELETE,
234 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
235 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
236 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN,
237 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH,
238 FAILURE_BROWSE_PREFIX_SET_MISSING,
239 FAILURE_BROWSE_PREFIX_SET_READ,
240 FAILURE_BROWSE_PREFIX_SET_WRITE,
241 FAILURE_BROWSE_PREFIX_SET_DELETE,
242 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN,
243 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH,
244 FAILURE_EXTENSION_BLACKLIST_DELETE,
245 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN,
246 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH,
247 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE,
248 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ,
249 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE,
250 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE,
251 FAILURE_IP_BLACKLIST_UPDATE_BEGIN,
252 FAILURE_IP_BLACKLIST_UPDATE_FINISH,
253 FAILURE_IP_BLACKLIST_UPDATE_INVALID,
254 FAILURE_IP_BLACKLIST_DELETE,
256 // Memory space for histograms is determined by the max. ALWAYS
257 // ADD NEW VALUES BEFORE THIS ONE.
258 FAILURE_DATABASE_MAX
261 static void RecordFailure(FailureType failure_type);
263 private:
264 // The factory used to instantiate a SafeBrowsingDatabase object.
265 // Useful for tests, so they can provide their own implementation of
266 // SafeBrowsingDatabase.
267 static SafeBrowsingDatabaseFactory* factory_;
270 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
271 public:
272 // Create a database with a browse, download, download whitelist and
273 // csd whitelist store objects. Takes ownership of all the store objects.
274 // When |download_store| is NULL, the database will ignore any operations
275 // related download (url hashes and binary hashes). The same is true for
276 // the |csd_whitelist_store|, |download_whitelist_store| and
277 // |ip_blacklist_store|.
278 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
279 SafeBrowsingStore* download_store,
280 SafeBrowsingStore* csd_whitelist_store,
281 SafeBrowsingStore* download_whitelist_store,
282 SafeBrowsingStore* extension_blacklist_store,
283 SafeBrowsingStore* side_effect_free_whitelist_store,
284 SafeBrowsingStore* ip_blacklist_store);
286 // Create a database with a browse store. This is a legacy interface that
287 // useds Sqlite.
288 SafeBrowsingDatabaseNew();
290 virtual ~SafeBrowsingDatabaseNew();
292 // Implement SafeBrowsingDatabase interface.
293 virtual void Init(const base::FilePath& filename) OVERRIDE;
294 virtual bool ResetDatabase() OVERRIDE;
295 virtual bool ContainsBrowseUrl(
296 const GURL& url,
297 std::vector<SBPrefix>* prefix_hits,
298 std::vector<SBFullHashResult>* cache_hits) OVERRIDE;
299 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
300 std::vector<SBPrefix>* prefix_hits) OVERRIDE;
301 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE;
302 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE;
303 virtual bool ContainsDownloadWhitelistedString(
304 const std::string& str) OVERRIDE;
305 virtual bool ContainsExtensionPrefixes(
306 const std::vector<SBPrefix>& prefixes,
307 std::vector<SBPrefix>* prefix_hits) OVERRIDE;
308 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE;
309 virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE;
310 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE;
311 virtual void InsertChunks(const std::string& list_name,
312 const std::vector<SBChunkData*>& chunks) OVERRIDE;
313 virtual void DeleteChunks(
314 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE;
315 virtual void UpdateFinished(bool update_succeeded) OVERRIDE;
316 virtual void CacheHashResults(
317 const std::vector<SBPrefix>& prefixes,
318 const std::vector<SBFullHashResult>& full_hits,
319 const base::TimeDelta& cache_lifetime) OVERRIDE;
321 // Returns the value of malware_kill_switch_;
322 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE;
324 // Returns true if the CSD whitelist has everything whitelisted.
325 virtual bool IsCsdWhitelistKillSwitchOn() OVERRIDE;
327 private:
328 friend class SafeBrowsingDatabaseTest;
329 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching);
330 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, CachedFullMiss);
331 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, CachedPrefixHitFullMiss);
332 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, BrowseFullHashMatching);
333 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest,
334 BrowseFullHashAndPrefixMatching);
336 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
337 // in a sorted vector) as well as a boolean flag indicating whether all
338 // lookups in the whitelist should be considered matches for safety.
339 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist;
341 // This map holds a csd malware IP blacklist which maps a prefix mask
342 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed
343 // IPv6 IP prefix using SHA-1.
344 typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist;
346 // Helper for ContainsBrowseUrl, exposed for testing.
347 bool ContainsBrowseUrlHashes(const std::vector<SBFullHash>& full_hashes,
348 std::vector<SBPrefix>* prefix_hits,
349 std::vector<SBFullHashResult>* cache_hits);
351 // Returns true if the whitelist is disabled or if any of the given hashes
352 // matches the whitelist.
353 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist,
354 const std::vector<SBFullHash>& hashes);
356 // Return the browse_store_, download_store_, download_whitelist_store or
357 // csd_whitelist_store_ based on list_id.
358 SafeBrowsingStore* GetStore(int list_id);
360 // Deletes the files on disk.
361 bool Delete();
363 // Load the prefix set off disk, if available.
364 void LoadPrefixSet();
366 // Writes the current prefix set to disk.
367 void WritePrefixSet();
369 // Loads the given full-length hashes to the given whitelist. If the number
370 // of hashes is too large or if the kill switch URL is on the whitelist
371 // we will whitelist everything.
372 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes,
373 SBWhitelist* whitelist);
375 // Call this method if an error occured with the given whitelist. This will
376 // result in all lookups to the whitelist to return true.
377 void WhitelistEverything(SBWhitelist* whitelist);
379 // Parses the IP blacklist from the given full-length hashes.
380 void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes);
382 // Helpers for handling database corruption.
383 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
384 // |corruption_detected_|, |HandleCorruptDatabase()| posts
385 // |OnHandleCorruptDatabase()| to the current thread, to be run
386 // after the current task completes.
387 // TODO(shess): Wire things up to entirely abort the update
388 // transaction when this happens.
389 void HandleCorruptDatabase();
390 void OnHandleCorruptDatabase();
392 // Helpers for InsertChunks().
393 void InsertAddChunk(SafeBrowsingStore* store,
394 safe_browsing_util::ListType list_id,
395 const SBChunkData& chunk);
396 void InsertSubChunk(SafeBrowsingStore* store,
397 safe_browsing_util::ListType list_id,
398 const SBChunkData& chunk);
400 // Returns the size in bytes of the store after the update.
401 int64 UpdateHashPrefixStore(const base::FilePath& store_filename,
402 SafeBrowsingStore* store,
403 FailureType failure_type);
404 void UpdateBrowseStore();
405 void UpdateSideEffectFreeWhitelistStore();
406 void UpdateWhitelistStore(const base::FilePath& store_filename,
407 SafeBrowsingStore* store,
408 SBWhitelist* whitelist);
409 void UpdateIpBlacklistStore();
411 // Used to verify that various calls are made from the thread the
412 // object was created on.
413 base::MessageLoop* creation_loop_;
415 // Lock for protecting access to variables that may be used on the IO thread.
416 // This includes |prefix_set_|, |browse_gethash_cache_|, |csd_whitelist_|.
417 base::Lock lookup_lock_;
419 // The base filename passed to Init(), used to generate the store and prefix
420 // set filenames used to store data on disk.
421 base::FilePath filename_base_;
423 // Underlying persistent store for chunk data.
424 // For browsing related (phishing and malware URLs) chunks and prefixes.
425 scoped_ptr<SafeBrowsingStore> browse_store_;
427 // For download related (download URL and binary hash) chunks and prefixes.
428 scoped_ptr<SafeBrowsingStore> download_store_;
430 // For the client-side phishing detection whitelist chunks and full-length
431 // hashes. This list only contains 256 bit hashes.
432 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
434 // For the download whitelist chunks and full-length hashes. This list only
435 // contains 256 bit hashes.
436 scoped_ptr<SafeBrowsingStore> download_whitelist_store_;
438 // For extension IDs.
439 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_;
441 // For side-effect free whitelist.
442 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_;
444 // For IP blacklist.
445 scoped_ptr<SafeBrowsingStore> ip_blacklist_store_;
447 SBWhitelist csd_whitelist_;
448 SBWhitelist download_whitelist_;
449 SBWhitelist extension_blacklist_;
451 // The IP blacklist should be small. At most a couple hundred IPs.
452 IPBlacklist ip_blacklist_;
454 // Cache of gethash results for browse store. Entries should not be used if
455 // they are older than their expire_after field. Cached misses will have
456 // empty full_hashes field. Cleared on each update.
457 std::map<SBPrefix, SBCachedFullHashResult> browse_gethash_cache_;
459 // Used to schedule resetting the database because of corruption.
460 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_;
462 // Set if corruption is detected during the course of an update.
463 // Causes the update functions to fail with no side effects, until
464 // the next call to |UpdateStarted()|.
465 bool corruption_detected_;
467 // Set to true if any chunks are added or deleted during an update.
468 // Used to optimize away database update.
469 bool change_detected_;
471 // Used to check if a prefix was in the browse database.
472 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_;
474 // Used to check if a prefix was in the browse database.
475 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_;
478 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_