Disable view source for Developer Tools.
[chromium-blink-merge.git] / chrome / browser / safe_browsing / safe_browsing_database.h
blob2c5c7c28e8ac3320b645b157f3e4a706a71947db
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
13 #include "base/containers/hash_tables.h"
14 #include "base/files/file_path.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/synchronization/lock.h"
19 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
21 namespace base {
22 class MessageLoop;
23 class Time;
26 namespace safe_browsing {
27 class PrefixSet;
30 class GURL;
31 class SafeBrowsingDatabase;
33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
34 // to create fake Databases for testing.
35 class SafeBrowsingDatabaseFactory {
36 public:
37 SafeBrowsingDatabaseFactory() { }
38 virtual ~SafeBrowsingDatabaseFactory() { }
39 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
40 bool enable_download_protection,
41 bool enable_client_side_whitelist,
42 bool enable_download_whitelist,
43 bool enable_extension_blacklist,
44 bool enable_side_effect_free_whitelist,
45 bool enable_ip_blacklist) = 0;
46 private:
47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
51 // Encapsulates on-disk databases that for safebrowsing. There are
52 // four databases: browse, download, download whitelist and
53 // client-side detection (csd) whitelist databases. The browse database contains
54 // information about phishing and malware urls. The download database contains
55 // URLs for bad binaries (e.g: those containing virus) and hash of
56 // these downloaded contents. The download whitelist contains whitelisted
57 // download hosting sites as well as whitelisted binary signing certificates
58 // etc. The csd whitelist database contains URLs that will never be considered
59 // as phishing by the client-side phishing detection. These on-disk databases
60 // are shared among all profiles, as it doesn't contain user-specific data. This
61 // object is not thread-safe, i.e. all its methods should be used on the same
62 // thread that it was created on.
63 class SafeBrowsingDatabase {
64 public:
65 // Factory method for obtaining a SafeBrowsingDatabase implementation.
66 // It is not thread safe.
67 // |enable_download_protection| is used to control the download database
68 // feature.
69 // |enable_client_side_whitelist| is used to control the csd whitelist
70 // database feature.
71 // |enable_download_whitelist| is used to control the download whitelist
72 // database feature.
73 // |enable_ip_blacklist| is used to control the csd malware IP blacklist
74 // database feature.
75 static SafeBrowsingDatabase* Create(bool enable_download_protection,
76 bool enable_client_side_whitelist,
77 bool enable_download_whitelist,
78 bool enable_extension_blacklist,
79 bool side_effect_free_whitelist,
80 bool enable_ip_blacklist);
82 // Makes the passed |factory| the factory used to instantiate
83 // a SafeBrowsingDatabase. This is used for tests.
84 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
85 factory_ = factory;
88 virtual ~SafeBrowsingDatabase();
90 // Initializes the database with the given filename.
91 virtual void Init(const base::FilePath& filename) = 0;
93 // Deletes the current database and creates a new one.
94 virtual bool ResetDatabase() = 0;
96 // Returns false if |url| is not in the browse database. If it
97 // returns true, then either |matching_list| is the name of the matching
98 // list, or |prefix_hits| and |full_hits| contains the matching hash
99 // prefixes. This function is safe to call from threads other than
100 // the creation thread.
101 virtual bool ContainsBrowseUrl(const GURL& url,
102 std::string* matching_list,
103 std::vector<SBPrefix>* prefix_hits,
104 std::vector<SBFullHashResult>* full_hits,
105 base::Time last_update) = 0;
107 // Returns false if none of |urls| are in Download database. If it returns
108 // true, |prefix_hits| should contain the prefixes for the URLs that were in
109 // the database. This function could ONLY be accessed from creation thread.
110 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
111 std::vector<SBPrefix>* prefix_hits) = 0;
113 // Returns false if |prefix| is not in Download database.
114 // This function could ONLY be accessed from creation thread.
115 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0;
117 // Returns false if |url| is not on the client-side phishing detection
118 // whitelist. Otherwise, this function returns true. Note: the whitelist
119 // only contains full-length hashes so we don't return any prefix hit.
120 // This function should only be called from the IO thread.
121 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
123 // The download whitelist is used for two purposes: a white-domain list of
124 // sites that are considered to host only harmless binaries as well as a
125 // whitelist of arbitrary strings such as hashed certificate authorities that
126 // are considered to be trusted. The two methods below let you lookup
127 // the whitelist either for a URL or an arbitrary string. These methods will
128 // return false if no match is found and true otherwise.
129 // This function could ONLY be accessed from the IO thread.
130 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0;
131 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0;
133 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
134 // in the database.
136 // This function can ONLY be accessed from the creation thread.
137 virtual bool ContainsExtensionPrefixes(
138 const std::vector<SBPrefix>& prefixes,
139 std::vector<SBPrefix>* prefix_hits) = 0;
141 // Returns false unless the hash of |url| is on the side-effect free
142 // whitelist.
143 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0;
145 // Returns true iff the given IP is currently on the csd malware IP blacklist.
146 virtual bool ContainsMalwareIP(const std::string& ip_address) = 0;
148 // A database transaction should look like:
150 // std::vector<SBListChunkRanges> lists;
151 // if (db.UpdateStarted(&lists)) {
152 // // Do something with |lists|.
154 // // Process add/sub commands.
155 // db.InsertChunks(list_name, chunks);
157 // // Process adddel/subdel commands.
158 // db.DeleteChunks(chunks_deletes);
160 // // If passed true, processes the collected chunk info and
161 // // rebuilds the filter. If passed false, rolls everything
162 // // back.
163 // db.UpdateFinished(success);
164 // }
166 // If UpdateStarted() returns true, the caller MUST eventually call
167 // UpdateFinished(). If it returns false, the caller MUST NOT call
168 // the other functions.
169 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
170 virtual void InsertChunks(const std::string& list_name,
171 const SBChunkList& chunks) = 0;
172 virtual void DeleteChunks(
173 const std::vector<SBChunkDelete>& chunk_deletes) = 0;
174 virtual void UpdateFinished(bool update_succeeded) = 0;
176 // Store the results of a GetHash response. In the case of empty results, we
177 // cache the prefixes until the next update so that we don't have to issue
178 // further GetHash requests we know will be empty.
179 virtual void CacheHashResults(
180 const std::vector<SBPrefix>& prefixes,
181 const std::vector<SBFullHashResult>& full_hits) = 0;
183 // Returns true if the malware IP blacklisting killswitch URL is present
184 // in the csd whitelist.
185 virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
187 // The name of the bloom-filter file for the given database file.
188 // NOTE(shess): OBSOLETE. Present for deleting stale files.
189 static base::FilePath BloomFilterForFilename(
190 const base::FilePath& db_filename);
192 // The name of the prefix set file for the given database file.
193 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename);
195 // Filename for malware and phishing URL database.
196 static base::FilePath BrowseDBFilename(
197 const base::FilePath& db_base_filename);
199 // Filename for download URL and download binary hash database.
200 static base::FilePath DownloadDBFilename(
201 const base::FilePath& db_base_filename);
203 // Filename for client-side phishing detection whitelist databsae.
204 static base::FilePath CsdWhitelistDBFilename(
205 const base::FilePath& csd_whitelist_base_filename);
207 // Filename for download whitelist databsae.
208 static base::FilePath DownloadWhitelistDBFilename(
209 const base::FilePath& download_whitelist_base_filename);
211 // Filename for extension blacklist database.
212 static base::FilePath ExtensionBlacklistDBFilename(
213 const base::FilePath& extension_blacklist_base_filename);
215 // Filename for side-effect free whitelist database.
216 static base::FilePath SideEffectFreeWhitelistDBFilename(
217 const base::FilePath& side_effect_free_whitelist_base_filename);
219 // Filename for the csd malware IP blacklist database.
220 static base::FilePath IpBlacklistDBFilename(
221 const base::FilePath& ip_blacklist_base_filename);
223 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE
224 // ORDERING OF THESE VALUES.
225 enum FailureType {
226 FAILURE_DATABASE_CORRUPT,
227 FAILURE_DATABASE_CORRUPT_HANDLER,
228 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
229 FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
230 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE,
231 FAILURE_DATABASE_FILTER_READ_OBSOLETE,
232 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE,
233 FAILURE_DATABASE_FILTER_DELETE,
234 FAILURE_DATABASE_STORE_MISSING,
235 FAILURE_DATABASE_STORE_DELETE,
236 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
237 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
238 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN,
239 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH,
240 FAILURE_BROWSE_PREFIX_SET_MISSING,
241 FAILURE_BROWSE_PREFIX_SET_READ,
242 FAILURE_BROWSE_PREFIX_SET_WRITE,
243 FAILURE_BROWSE_PREFIX_SET_DELETE,
244 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN,
245 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH,
246 FAILURE_EXTENSION_BLACKLIST_DELETE,
247 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN,
248 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH,
249 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE,
250 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ,
251 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE,
252 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE,
253 FAILURE_IP_BLACKLIST_UPDATE_BEGIN,
254 FAILURE_IP_BLACKLIST_UPDATE_FINISH,
255 FAILURE_IP_BLACKLIST_UPDATE_INVALID,
256 FAILURE_IP_BLACKLIST_DELETE,
258 // Memory space for histograms is determined by the max. ALWAYS
259 // ADD NEW VALUES BEFORE THIS ONE.
260 FAILURE_DATABASE_MAX
263 static void RecordFailure(FailureType failure_type);
265 private:
266 // The factory used to instantiate a SafeBrowsingDatabase object.
267 // Useful for tests, so they can provide their own implementation of
268 // SafeBrowsingDatabase.
269 static SafeBrowsingDatabaseFactory* factory_;
272 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
273 public:
274 // Create a database with a browse, download, download whitelist and
275 // csd whitelist store objects. Takes ownership of all the store objects.
276 // When |download_store| is NULL, the database will ignore any operations
277 // related download (url hashes and binary hashes). The same is true for
278 // the |csd_whitelist_store|, |download_whitelist_store| and
279 // |ip_blacklist_store|.
280 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
281 SafeBrowsingStore* download_store,
282 SafeBrowsingStore* csd_whitelist_store,
283 SafeBrowsingStore* download_whitelist_store,
284 SafeBrowsingStore* extension_blacklist_store,
285 SafeBrowsingStore* side_effect_free_whitelist_store,
286 SafeBrowsingStore* ip_blacklist_store);
288 // Create a database with a browse store. This is a legacy interface that
289 // useds Sqlite.
290 SafeBrowsingDatabaseNew();
292 virtual ~SafeBrowsingDatabaseNew();
294 // Implement SafeBrowsingDatabase interface.
295 virtual void Init(const base::FilePath& filename) OVERRIDE;
296 virtual bool ResetDatabase() OVERRIDE;
297 virtual bool ContainsBrowseUrl(const GURL& url,
298 std::string* matching_list,
299 std::vector<SBPrefix>* prefix_hits,
300 std::vector<SBFullHashResult>* full_hits,
301 base::Time last_update) OVERRIDE;
302 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
303 std::vector<SBPrefix>* prefix_hits) OVERRIDE;
304 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) OVERRIDE;
305 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE;
306 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE;
307 virtual bool ContainsDownloadWhitelistedString(
308 const std::string& str) OVERRIDE;
309 virtual bool ContainsExtensionPrefixes(
310 const std::vector<SBPrefix>& prefixes,
311 std::vector<SBPrefix>* prefix_hits) OVERRIDE;
312 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE;
313 virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE;
314 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE;
315 virtual void InsertChunks(const std::string& list_name,
316 const SBChunkList& chunks) OVERRIDE;
317 virtual void DeleteChunks(
318 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE;
319 virtual void UpdateFinished(bool update_succeeded) OVERRIDE;
320 virtual void CacheHashResults(
321 const std::vector<SBPrefix>& prefixes,
322 const std::vector<SBFullHashResult>& full_hits) OVERRIDE;
324 // Returns the value of malware_kill_switch_;
325 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE;
327 private:
328 friend class SafeBrowsingDatabaseTest;
329 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching);
331 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
332 // in a sorted vector) as well as a boolean flag indicating whether all
333 // lookups in the whitelist should be considered matches for safety.
334 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist;
336 // This map holds a csd malware IP blacklist which maps a prefix mask
337 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed
338 // IPv6 IP prefix using SHA-1.
339 typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist;
341 // Returns true if the whitelist is disabled or if any of the given hashes
342 // matches the whitelist.
343 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist,
344 const std::vector<SBFullHash>& hashes);
346 // Return the browse_store_, download_store_, download_whitelist_store or
347 // csd_whitelist_store_ based on list_id.
348 SafeBrowsingStore* GetStore(int list_id);
350 // Deletes the files on disk.
351 bool Delete();
353 // Load the prefix set off disk, if available.
354 void LoadPrefixSet();
356 // Writes the current prefix set to disk.
357 void WritePrefixSet();
359 // Loads the given full-length hashes to the given whitelist. If the number
360 // of hashes is too large or if the kill switch URL is on the whitelist
361 // we will whitelist everything.
362 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes,
363 SBWhitelist* whitelist);
365 // Call this method if an error occured with the given whitelist. This will
366 // result in all lookups to the whitelist to return true.
367 void WhitelistEverything(SBWhitelist* whitelist);
369 // Parses the IP blacklist from the given full-length hashes.
370 void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes);
372 // Helpers for handling database corruption.
373 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
374 // |corruption_detected_|, |HandleCorruptDatabase()| posts
375 // |OnHandleCorruptDatabase()| to the current thread, to be run
376 // after the current task completes.
377 // TODO(shess): Wire things up to entirely abort the update
378 // transaction when this happens.
379 void HandleCorruptDatabase();
380 void OnHandleCorruptDatabase();
382 // Helpers for InsertChunks().
383 void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
384 void InsertAddChunks(safe_browsing_util::ListType list_id,
385 const SBChunkList& chunks);
386 void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
387 void InsertSubChunks(safe_browsing_util::ListType list_id,
388 const SBChunkList& chunks);
390 // Returns the size in bytes of the store after the update.
391 int64 UpdateHashPrefixStore(const base::FilePath& store_filename,
392 SafeBrowsingStore* store,
393 FailureType failure_type);
394 void UpdateBrowseStore();
395 void UpdateSideEffectFreeWhitelistStore();
396 void UpdateWhitelistStore(const base::FilePath& store_filename,
397 SafeBrowsingStore* store,
398 SBWhitelist* whitelist);
399 void UpdateIpBlacklistStore();
401 // Used to verify that various calls are made from the thread the
402 // object was created on.
403 base::MessageLoop* creation_loop_;
405 // Lock for protecting access to variables that may be used on the
406 // IO thread. This includes |prefix_set_|, |full_browse_hashes_|,
407 // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|.
408 base::Lock lookup_lock_;
410 // Underlying persistent store for chunk data.
411 // For browsing related (phishing and malware URLs) chunks and prefixes.
412 base::FilePath browse_filename_;
413 scoped_ptr<SafeBrowsingStore> browse_store_;
415 // For download related (download URL and binary hash) chunks and prefixes.
416 base::FilePath download_filename_;
417 scoped_ptr<SafeBrowsingStore> download_store_;
419 // For the client-side phishing detection whitelist chunks and full-length
420 // hashes. This list only contains 256 bit hashes.
421 base::FilePath csd_whitelist_filename_;
422 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
424 // For the download whitelist chunks and full-length hashes. This list only
425 // contains 256 bit hashes.
426 base::FilePath download_whitelist_filename_;
427 scoped_ptr<SafeBrowsingStore> download_whitelist_store_;
429 // For extension IDs.
430 base::FilePath extension_blacklist_filename_;
431 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_;
433 // For side-effect free whitelist.
434 base::FilePath side_effect_free_whitelist_filename_;
435 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_;
437 // For IP blacklist.
438 base::FilePath ip_blacklist_filename_;
439 scoped_ptr<SafeBrowsingStore> ip_blacklist_store_;
441 SBWhitelist csd_whitelist_;
442 SBWhitelist download_whitelist_;
443 SBWhitelist extension_blacklist_;
445 // The IP blacklist should be small. At most a couple hundred IPs.
446 IPBlacklist ip_blacklist_;
448 // Cached browse store related full-hash items, ordered by prefix for
449 // efficient scanning.
450 // |full_browse_hashes_| are items from |browse_store_|,
451 // |pending_browse_hashes_| are items from |CacheHashResults()|, which
452 // will be pushed to the store on the next update.
453 std::vector<SBAddFullHash> full_browse_hashes_;
454 std::vector<SBAddFullHash> pending_browse_hashes_;
456 // Cache of prefixes that returned empty results (no full hash
457 // match) to |CacheHashResults()|. Cached to prevent asking for
458 // them every time. Cleared on next update.
459 std::set<SBPrefix> prefix_miss_cache_;
461 // Used to schedule resetting the database because of corruption.
462 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_;
464 // Set if corruption is detected during the course of an update.
465 // Causes the update functions to fail with no side effects, until
466 // the next call to |UpdateStarted()|.
467 bool corruption_detected_;
469 // Set to true if any chunks are added or deleted during an update.
470 // Used to optimize away database update.
471 bool change_detected_;
473 // Used to check if a prefix was in the browse database.
474 base::FilePath browse_prefix_set_filename_;
475 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_;
477 // Used to check if a prefix was in the browse database.
478 base::FilePath side_effect_free_whitelist_prefix_set_filename_;
479 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_;
482 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_