GoogleURLTrackerInfoBarDelegate: Initialize uninitialized member in constructor.
[chromium-blink-merge.git] / chrome / browser / safe_browsing / safe_browsing_database.h
blobf0d23e3fa8254fd73f73f7fd9fff87a8d51cfeb9
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
13 #include "base/containers/hash_tables.h"
14 #include "base/files/file_path.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/synchronization/lock.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
22 namespace base {
23 class MessageLoop;
26 namespace safe_browsing {
27 class PrefixSet;
30 class GURL;
31 class SafeBrowsingDatabase;
33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
34 // to create fake Databases for testing.
35 class SafeBrowsingDatabaseFactory {
36 public:
37 SafeBrowsingDatabaseFactory() { }
38 virtual ~SafeBrowsingDatabaseFactory() { }
39 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
40 bool enable_download_protection,
41 bool enable_client_side_whitelist,
42 bool enable_download_whitelist,
43 bool enable_extension_blacklist,
44 bool enable_side_effect_free_whitelist,
45 bool enable_ip_blacklist) = 0;
46 private:
47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
50 // Contains full_hash elements which are cached in memory. Differs from
51 // SBAddFullHash in deriving |list_id| from |chunk_id|. Differs from
52 // SBFullHashResult in adding |received| for later expiration.
53 // TODO(shess): Remove/refactor this as part of converting to v2.3 caching
54 // semantics.
55 struct SBFullHashCached {
56 SBFullHash hash;
57 int list_id; // TODO(shess): Use safe_browsing_util::ListType.
58 base::Time expire_after;
61 // Encapsulates on-disk databases that for safebrowsing. There are
62 // four databases: browse, download, download whitelist and
63 // client-side detection (csd) whitelist databases. The browse database contains
64 // information about phishing and malware urls. The download database contains
65 // URLs for bad binaries (e.g: those containing virus) and hash of
66 // these downloaded contents. The download whitelist contains whitelisted
67 // download hosting sites as well as whitelisted binary signing certificates
68 // etc. The csd whitelist database contains URLs that will never be considered
69 // as phishing by the client-side phishing detection. These on-disk databases
70 // are shared among all profiles, as it doesn't contain user-specific data. This
71 // object is not thread-safe, i.e. all its methods should be used on the same
72 // thread that it was created on.
73 class SafeBrowsingDatabase {
74 public:
75 // Factory method for obtaining a SafeBrowsingDatabase implementation.
76 // It is not thread safe.
77 // |enable_download_protection| is used to control the download database
78 // feature.
79 // |enable_client_side_whitelist| is used to control the csd whitelist
80 // database feature.
81 // |enable_download_whitelist| is used to control the download whitelist
82 // database feature.
83 // |enable_ip_blacklist| is used to control the csd malware IP blacklist
84 // database feature.
85 static SafeBrowsingDatabase* Create(bool enable_download_protection,
86 bool enable_client_side_whitelist,
87 bool enable_download_whitelist,
88 bool enable_extension_blacklist,
89 bool side_effect_free_whitelist,
90 bool enable_ip_blacklist);
92 // Makes the passed |factory| the factory used to instantiate
93 // a SafeBrowsingDatabase. This is used for tests.
94 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
95 factory_ = factory;
98 virtual ~SafeBrowsingDatabase();
100 // Initializes the database with the given filename.
101 virtual void Init(const base::FilePath& filename) = 0;
103 // Deletes the current database and creates a new one.
104 virtual bool ResetDatabase() = 0;
106 // Returns false if |url| is not in the browse database. If it returns true,
107 // then |prefix_hits| contains the list of prefix matches, and |cache_hits|
108 // contains the cached gethash results for those prefixes (if any). This
109 // function is safe to call from threads other than the creation thread.
110 virtual bool ContainsBrowseUrl(
111 const GURL& url,
112 std::vector<SBPrefix>* prefix_hits,
113 std::vector<SBFullHashResult>* cache_hits) = 0;
115 // Returns false if none of |urls| are in Download database. If it returns
116 // true, |prefix_hits| should contain the prefixes for the URLs that were in
117 // the database. This function could ONLY be accessed from creation thread.
118 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
119 std::vector<SBPrefix>* prefix_hits) = 0;
121 // Returns false if |url| is not on the client-side phishing detection
122 // whitelist. Otherwise, this function returns true. Note: the whitelist
123 // only contains full-length hashes so we don't return any prefix hit.
124 // This function should only be called from the IO thread.
125 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
127 // The download whitelist is used for two purposes: a white-domain list of
128 // sites that are considered to host only harmless binaries as well as a
129 // whitelist of arbitrary strings such as hashed certificate authorities that
130 // are considered to be trusted. The two methods below let you lookup
131 // the whitelist either for a URL or an arbitrary string. These methods will
132 // return false if no match is found and true otherwise.
133 // This function could ONLY be accessed from the IO thread.
134 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0;
135 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0;
137 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
138 // in the database.
140 // This function can ONLY be accessed from the creation thread.
141 virtual bool ContainsExtensionPrefixes(
142 const std::vector<SBPrefix>& prefixes,
143 std::vector<SBPrefix>* prefix_hits) = 0;
145 // Returns false unless the hash of |url| is on the side-effect free
146 // whitelist.
147 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0;
149 // Returns true iff the given IP is currently on the csd malware IP blacklist.
150 virtual bool ContainsMalwareIP(const std::string& ip_address) = 0;
152 // A database transaction should look like:
154 // std::vector<SBListChunkRanges> lists;
155 // if (db.UpdateStarted(&lists)) {
156 // // Do something with |lists|.
158 // // Process add/sub commands.
159 // db.InsertChunks(list_name, chunks);
161 // // Process adddel/subdel commands.
162 // db.DeleteChunks(chunks_deletes);
164 // // If passed true, processes the collected chunk info and
165 // // rebuilds the filter. If passed false, rolls everything
166 // // back.
167 // db.UpdateFinished(success);
168 // }
170 // If UpdateStarted() returns true, the caller MUST eventually call
171 // UpdateFinished(). If it returns false, the caller MUST NOT call
172 // the other functions.
173 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
174 virtual void InsertChunks(const std::string& list_name,
175 const SBChunkList& chunks) = 0;
176 virtual void DeleteChunks(
177 const std::vector<SBChunkDelete>& chunk_deletes) = 0;
178 virtual void UpdateFinished(bool update_succeeded) = 0;
180 // Store the results of a GetHash response. In the case of empty results, we
181 // cache the prefixes until the next update so that we don't have to issue
182 // further GetHash requests we know will be empty.
183 virtual void CacheHashResults(
184 const std::vector<SBPrefix>& prefixes,
185 const std::vector<SBFullHashResult>& full_hits,
186 const base::TimeDelta& cache_lifetime) = 0;
188 // Returns true if the malware IP blacklisting killswitch URL is present
189 // in the csd whitelist.
190 virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
192 // Returns true if the whitelist killswitch URL is present in the csd
193 // whitelist.
194 virtual bool IsCsdWhitelistKillSwitchOn() = 0;
196 // The name of the bloom-filter file for the given database file.
197 // NOTE(shess): OBSOLETE. Present for deleting stale files.
198 static base::FilePath BloomFilterForFilename(
199 const base::FilePath& db_filename);
201 // The name of the prefix set file for the given database file.
202 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename);
204 // Filename for malware and phishing URL database.
205 static base::FilePath BrowseDBFilename(
206 const base::FilePath& db_base_filename);
208 // Filename for download URL and download binary hash database.
209 static base::FilePath DownloadDBFilename(
210 const base::FilePath& db_base_filename);
212 // Filename for client-side phishing detection whitelist databsae.
213 static base::FilePath CsdWhitelistDBFilename(
214 const base::FilePath& csd_whitelist_base_filename);
216 // Filename for download whitelist databsae.
217 static base::FilePath DownloadWhitelistDBFilename(
218 const base::FilePath& download_whitelist_base_filename);
220 // Filename for extension blacklist database.
221 static base::FilePath ExtensionBlacklistDBFilename(
222 const base::FilePath& extension_blacklist_base_filename);
224 // Filename for side-effect free whitelist database.
225 static base::FilePath SideEffectFreeWhitelistDBFilename(
226 const base::FilePath& side_effect_free_whitelist_base_filename);
228 // Filename for the csd malware IP blacklist database.
229 static base::FilePath IpBlacklistDBFilename(
230 const base::FilePath& ip_blacklist_base_filename);
232 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE
233 // ORDERING OF THESE VALUES.
234 enum FailureType {
235 FAILURE_DATABASE_CORRUPT,
236 FAILURE_DATABASE_CORRUPT_HANDLER,
237 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
238 FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
239 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE,
240 FAILURE_DATABASE_FILTER_READ_OBSOLETE,
241 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE,
242 FAILURE_DATABASE_FILTER_DELETE,
243 FAILURE_DATABASE_STORE_MISSING,
244 FAILURE_DATABASE_STORE_DELETE,
245 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
246 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
247 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN,
248 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH,
249 FAILURE_BROWSE_PREFIX_SET_MISSING,
250 FAILURE_BROWSE_PREFIX_SET_READ,
251 FAILURE_BROWSE_PREFIX_SET_WRITE,
252 FAILURE_BROWSE_PREFIX_SET_DELETE,
253 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN,
254 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH,
255 FAILURE_EXTENSION_BLACKLIST_DELETE,
256 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN,
257 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH,
258 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE,
259 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ,
260 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE,
261 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE,
262 FAILURE_IP_BLACKLIST_UPDATE_BEGIN,
263 FAILURE_IP_BLACKLIST_UPDATE_FINISH,
264 FAILURE_IP_BLACKLIST_UPDATE_INVALID,
265 FAILURE_IP_BLACKLIST_DELETE,
267 // Memory space for histograms is determined by the max. ALWAYS
268 // ADD NEW VALUES BEFORE THIS ONE.
269 FAILURE_DATABASE_MAX
272 static void RecordFailure(FailureType failure_type);
274 private:
275 // The factory used to instantiate a SafeBrowsingDatabase object.
276 // Useful for tests, so they can provide their own implementation of
277 // SafeBrowsingDatabase.
278 static SafeBrowsingDatabaseFactory* factory_;
281 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
282 public:
283 // Create a database with a browse, download, download whitelist and
284 // csd whitelist store objects. Takes ownership of all the store objects.
285 // When |download_store| is NULL, the database will ignore any operations
286 // related download (url hashes and binary hashes). The same is true for
287 // the |csd_whitelist_store|, |download_whitelist_store| and
288 // |ip_blacklist_store|.
289 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
290 SafeBrowsingStore* download_store,
291 SafeBrowsingStore* csd_whitelist_store,
292 SafeBrowsingStore* download_whitelist_store,
293 SafeBrowsingStore* extension_blacklist_store,
294 SafeBrowsingStore* side_effect_free_whitelist_store,
295 SafeBrowsingStore* ip_blacklist_store);
297 // Create a database with a browse store. This is a legacy interface that
298 // useds Sqlite.
299 SafeBrowsingDatabaseNew();
301 virtual ~SafeBrowsingDatabaseNew();
303 // Implement SafeBrowsingDatabase interface.
304 virtual void Init(const base::FilePath& filename) OVERRIDE;
305 virtual bool ResetDatabase() OVERRIDE;
306 virtual bool ContainsBrowseUrl(
307 const GURL& url,
308 std::vector<SBPrefix>* prefix_hits,
309 std::vector<SBFullHashResult>* cache_hits) OVERRIDE;
310 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
311 std::vector<SBPrefix>* prefix_hits) OVERRIDE;
312 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE;
313 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE;
314 virtual bool ContainsDownloadWhitelistedString(
315 const std::string& str) OVERRIDE;
316 virtual bool ContainsExtensionPrefixes(
317 const std::vector<SBPrefix>& prefixes,
318 std::vector<SBPrefix>* prefix_hits) OVERRIDE;
319 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE;
320 virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE;
321 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE;
322 virtual void InsertChunks(const std::string& list_name,
323 const SBChunkList& chunks) OVERRIDE;
324 virtual void DeleteChunks(
325 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE;
326 virtual void UpdateFinished(bool update_succeeded) OVERRIDE;
327 virtual void CacheHashResults(
328 const std::vector<SBPrefix>& prefixes,
329 const std::vector<SBFullHashResult>& full_hits,
330 const base::TimeDelta& cache_lifetime) OVERRIDE;
332 // Returns the value of malware_kill_switch_;
333 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE;
335 // Returns true if the CSD whitelist has everything whitelisted.
336 virtual bool IsCsdWhitelistKillSwitchOn() OVERRIDE;
338 private:
339 friend class SafeBrowsingDatabaseTest;
340 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching);
342 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
343 // in a sorted vector) as well as a boolean flag indicating whether all
344 // lookups in the whitelist should be considered matches for safety.
345 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist;
347 // This map holds a csd malware IP blacklist which maps a prefix mask
348 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed
349 // IPv6 IP prefix using SHA-1.
350 typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist;
352 // Returns true if the whitelist is disabled or if any of the given hashes
353 // matches the whitelist.
354 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist,
355 const std::vector<SBFullHash>& hashes);
357 // Return the browse_store_, download_store_, download_whitelist_store or
358 // csd_whitelist_store_ based on list_id.
359 SafeBrowsingStore* GetStore(int list_id);
361 // Deletes the files on disk.
362 bool Delete();
364 // Load the prefix set off disk, if available.
365 void LoadPrefixSet();
367 // Writes the current prefix set to disk.
368 void WritePrefixSet();
370 // Loads the given full-length hashes to the given whitelist. If the number
371 // of hashes is too large or if the kill switch URL is on the whitelist
372 // we will whitelist everything.
373 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes,
374 SBWhitelist* whitelist);
376 // Call this method if an error occured with the given whitelist. This will
377 // result in all lookups to the whitelist to return true.
378 void WhitelistEverything(SBWhitelist* whitelist);
380 // Parses the IP blacklist from the given full-length hashes.
381 void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes);
383 // Helpers for handling database corruption.
384 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
385 // |corruption_detected_|, |HandleCorruptDatabase()| posts
386 // |OnHandleCorruptDatabase()| to the current thread, to be run
387 // after the current task completes.
388 // TODO(shess): Wire things up to entirely abort the update
389 // transaction when this happens.
390 void HandleCorruptDatabase();
391 void OnHandleCorruptDatabase();
393 // Helpers for InsertChunks().
394 void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
395 void InsertAddChunks(safe_browsing_util::ListType list_id,
396 const SBChunkList& chunks);
397 void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
398 void InsertSubChunks(safe_browsing_util::ListType list_id,
399 const SBChunkList& chunks);
401 // Returns the size in bytes of the store after the update.
402 int64 UpdateHashPrefixStore(const base::FilePath& store_filename,
403 SafeBrowsingStore* store,
404 FailureType failure_type);
405 void UpdateBrowseStore();
406 void UpdateSideEffectFreeWhitelistStore();
407 void UpdateWhitelistStore(const base::FilePath& store_filename,
408 SafeBrowsingStore* store,
409 SBWhitelist* whitelist);
410 void UpdateIpBlacklistStore();
412 // Used to verify that various calls are made from the thread the
413 // object was created on.
414 base::MessageLoop* creation_loop_;
416 // Lock for protecting access to variables that may be used on the
417 // IO thread. This includes |prefix_set_|, |cached_browse_hashes_|,
418 // |prefix_miss_cache_|, |csd_whitelist_|.
419 base::Lock lookup_lock_;
421 // Underlying persistent store for chunk data.
422 // For browsing related (phishing and malware URLs) chunks and prefixes.
423 base::FilePath browse_filename_;
424 scoped_ptr<SafeBrowsingStore> browse_store_;
426 // For download related (download URL and binary hash) chunks and prefixes.
427 base::FilePath download_filename_;
428 scoped_ptr<SafeBrowsingStore> download_store_;
430 // For the client-side phishing detection whitelist chunks and full-length
431 // hashes. This list only contains 256 bit hashes.
432 base::FilePath csd_whitelist_filename_;
433 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
435 // For the download whitelist chunks and full-length hashes. This list only
436 // contains 256 bit hashes.
437 base::FilePath download_whitelist_filename_;
438 scoped_ptr<SafeBrowsingStore> download_whitelist_store_;
440 // For extension IDs.
441 base::FilePath extension_blacklist_filename_;
442 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_;
444 // For side-effect free whitelist.
445 base::FilePath side_effect_free_whitelist_filename_;
446 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_;
448 // For IP blacklist.
449 base::FilePath ip_blacklist_filename_;
450 scoped_ptr<SafeBrowsingStore> ip_blacklist_store_;
452 SBWhitelist csd_whitelist_;
453 SBWhitelist download_whitelist_;
454 SBWhitelist extension_blacklist_;
456 // The IP blacklist should be small. At most a couple hundred IPs.
457 IPBlacklist ip_blacklist_;
459 // Store items from CacheHashResults(), ordered by hash for efficient
460 // scanning. Discarded on next update.
461 std::vector<SBFullHashCached> cached_browse_hashes_;
463 // Cache of prefixes that returned empty results (no full hash
464 // match) to |CacheHashResults()|. Cached to prevent asking for
465 // them every time. Cleared on next update.
466 std::set<SBPrefix> prefix_miss_cache_;
468 // Used to schedule resetting the database because of corruption.
469 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_;
471 // Set if corruption is detected during the course of an update.
472 // Causes the update functions to fail with no side effects, until
473 // the next call to |UpdateStarted()|.
474 bool corruption_detected_;
476 // Set to true if any chunks are added or deleted during an update.
477 // Used to optimize away database update.
478 bool change_detected_;
480 // Used to check if a prefix was in the browse database.
481 base::FilePath browse_prefix_set_filename_;
482 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_;
484 // Used to check if a prefix was in the browse database.
485 base::FilePath side_effect_free_whitelist_prefix_set_filename_;
486 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_;
489 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_