1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The Safe Browsing service is responsible for downloading anti-phishing and
6 // anti-malware tables and checking urls against them.
8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
17 #include "base/callback.h"
18 #include "base/containers/hash_tables.h"
19 #include "base/memory/ref_counted.h"
20 #include "base/memory/scoped_ptr.h"
21 #include "base/synchronization/lock.h"
22 #include "base/time/time.h"
23 #include "chrome/browser/safe_browsing/protocol_manager.h"
24 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
27 class SafeBrowsingService
;
28 class SafeBrowsingDatabase
;
35 class URLRequestContext
;
36 class URLRequestContextGetter
;
39 namespace safe_browsing
{
40 class ClientSideDetectionService
;
41 class DownloadProtectionService
;
44 // Construction needs to happen on the main thread.
45 class SafeBrowsingDatabaseManager
46 : public base::RefCountedThreadSafe
<SafeBrowsingDatabaseManager
>,
47 public SafeBrowsingProtocolManagerDelegate
{
51 // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
52 struct SafeBrowsingCheck
{
53 // |check_type| should correspond to the type of item that is being
54 // checked, either a URL or a binary hash/URL. We store this for two
55 // purposes: to know which of Client's methods to call when a result is
56 // known, and for logging purposes. It *isn't* used to predict the response
57 // list type, that is information that the server gives us.
58 SafeBrowsingCheck(const std::vector
<GURL
>& urls
,
59 const std::vector
<SBFullHash
>& full_hashes
,
61 safe_browsing_util::ListType check_type
,
62 const std::vector
<SBThreatType
>& expected_threats
);
65 // Either |urls| or |full_hashes| is used to lookup database. |*_results|
66 // are parallel vectors containing the results. They are initialized to
67 // contain SB_THREAT_TYPE_SAFE.
68 std::vector
<GURL
> urls
;
69 std::vector
<SBThreatType
> url_results
;
70 std::vector
<SBFullHash
> full_hashes
;
71 std::vector
<SBThreatType
> full_hash_results
;
75 base::TimeTicks start
; // When check was sent to SB service.
76 safe_browsing_util::ListType check_type
; // See comment in constructor.
77 std::vector
<SBThreatType
> expected_threats
;
78 std::vector
<SBPrefix
> prefix_hits
;
79 std::vector
<SBFullHashResult
> full_hits
;
81 // Vends weak pointers for TimeoutCallback(). If the response is
82 // received before the timeout fires, factory is destructed and
83 // the timeout won't be fired.
84 // TODO(lzheng): We should consider to use this time out check
85 // for browsing too (instead of implementin in
86 // safe_browsing_resource_handler.cc).
87 scoped_ptr
<base::WeakPtrFactory
<
88 SafeBrowsingDatabaseManager
> > timeout_factory_
;
91 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck
);
96 void OnSafeBrowsingResult(const SafeBrowsingCheck
& check
);
101 // Called when the result of checking a browse URL is known.
102 virtual void OnCheckBrowseUrlResult(const GURL
& url
,
103 SBThreatType threat_type
) {}
105 // Called when the result of checking a download URL is known.
106 virtual void OnCheckDownloadUrlResult(const std::vector
<GURL
>& url_chain
,
107 SBThreatType threat_type
) {}
109 // Called when the result of checking a download binary hash is known.
110 virtual void OnCheckDownloadHashResult(const std::string
& hash
,
111 SBThreatType threat_type
) {}
113 // Called when the result of checking a set of extensions is known.
114 virtual void OnCheckExtensionsResult(
115 const std::set
<std::string
>& threats
) {}
118 // Creates the safe browsing service. Need to initialize before using.
119 explicit SafeBrowsingDatabaseManager(
120 const scoped_refptr
<SafeBrowsingService
>& service
);
122 // Returns true if the url's scheme can be checked.
123 bool CanCheckUrl(const GURL
& url
) const;
125 // Returns whether download protection is enabled.
126 bool download_protection_enabled() const {
127 return enable_download_protection_
;
130 // Called on the IO thread to check if the given url is safe or not. If we
131 // can synchronously determine that the url is safe, CheckUrl returns true.
132 // Otherwise it returns false, and "client" is called asynchronously with the
133 // result when it is ready.
134 virtual bool CheckBrowseUrl(const GURL
& url
, Client
* client
);
136 // Check if the prefix for |url| is in safebrowsing download add lists.
137 // Result will be passed to callback in |client|.
138 virtual bool CheckDownloadUrl(const std::vector
<GURL
>& url_chain
,
141 // Check if the prefix for |full_hash| is in safebrowsing binhash add lists.
142 // Result will be passed to callback in |client|.
143 virtual bool CheckDownloadHash(const std::string
& full_hash
, Client
* client
);
145 // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
146 // Returns true if not, false if further checks need to be made in which case
147 // the result will be passed to |client|.
148 virtual bool CheckExtensionIDs(const std::set
<std::string
>& extension_ids
,
151 // Check if the given url is on the side-effect free whitelist.
152 // Can be called on any thread. Returns false if the check cannot be performed
153 // (e.g. because we are disabled or because of an invalid scheme in the URL).
154 // Otherwise, returns true if the URL is on the whitelist based on matching
155 // the hash prefix only (so there may be false positives).
156 virtual bool CheckSideEffectFreeWhitelistUrl(const GURL
& url
);
158 // Check if the |url| matches any of the full-length hashes from the
159 // client-side phishing detection whitelist. Returns true if there was a
160 // match and false otherwise. To make sure we are conservative we will return
161 // true if an error occurs. This method is expected to be called on the IO
163 virtual bool MatchCsdWhitelistUrl(const GURL
& url
);
165 // Check if the given IP address (either IPv4 or IPv6) matches the malware
167 virtual bool MatchMalwareIP(const std::string
& ip_address
);
169 // Check if the |url| matches any of the full-length hashes from the
170 // download whitelist. Returns true if there was a match and false otherwise.
171 // To make sure we are conservative we will return true if an error occurs.
172 // This method is expected to be called on the IO thread.
173 virtual bool MatchDownloadWhitelistUrl(const GURL
& url
);
175 // Check if |str| matches any of the full-length hashes from the download
176 // whitelist. Returns true if there was a match and false otherwise.
177 // To make sure we are conservative we will return true if an error occurs.
178 // This method is expected to be called on the IO thread.
179 virtual bool MatchDownloadWhitelistString(const std::string
& str
);
181 // Check if the CSD malware IP matching kill switch is turned on.
182 virtual bool IsMalwareKillSwitchOn();
184 // Called on the IO thread to cancel a pending check if the result is no
186 void CancelCheck(Client
* client
);
188 // Called on the IO thread when the SafeBrowsingProtocolManager has received
189 // the full hash results for prefix hits detected in the database.
190 void HandleGetHashResults(
191 SafeBrowsingCheck
* check
,
192 const std::vector
<SBFullHashResult
>& full_hashes
,
195 // Called on the IO thread to release memory.
198 // Log the user perceived delay caused by SafeBrowsing. This delay is the time
199 // delta starting from when we would have started reading data from the
200 // network, and ending when the SafeBrowsing check completes indicating that
201 // the current page is 'safe'.
202 void LogPauseDelay(base::TimeDelta time
);
204 // Called to initialize objects that are used on the io_thread. This may be
205 // called multiple times during the life of the DatabaseManager. Should be
206 // called on IO thread.
207 void StartOnIOThread();
209 // Called to stop or shutdown operations on the io_thread. This may be called
210 // multiple times during the life of the DatabaseManager. Should be called
211 // on IO thread. If shutdown is true, the manager is disabled permanently.
212 void StopOnIOThread(bool shutdown
);
215 virtual ~SafeBrowsingDatabaseManager();
217 // protected for tests.
218 void NotifyDatabaseUpdateFinished(bool update_succeeded
);
221 friend class base::RefCountedThreadSafe
<SafeBrowsingDatabaseManager
>;
222 friend class SafeBrowsingServerTest
;
223 friend class SafeBrowsingServiceTest
;
224 friend class SafeBrowsingServiceTestHelper
;
225 friend class SafeBrowsingDatabaseManagerTest
;
227 typedef std::set
<SafeBrowsingCheck
*> CurrentChecks
;
228 typedef std::vector
<SafeBrowsingCheck
*> GetHashRequestors
;
229 typedef base::hash_map
<SBPrefix
, GetHashRequestors
> GetHashRequests
;
231 // Clients that we've queued up for checking later once the database is ready.
233 QueuedCheck(const safe_browsing_util::ListType check_type
,
236 const std::vector
<SBThreatType
>& expected_threats
,
237 const base::TimeTicks
& start
);
239 safe_browsing_util::ListType check_type
;
242 std::vector
<SBThreatType
> expected_threats
;
243 base::TimeTicks start
; // When check was queued.
246 // Called to stop operations on the io_thread. This may be called multiple
247 // times during the life of the DatabaseManager. Should be called on IO
249 void DoStopOnIOThread();
251 // Returns whether |database_| exists and is accessible.
252 bool DatabaseAvailable() const;
254 // Called on the IO thread. If the database does not exist, queues up a call
255 // on the db thread to create it. Returns whether the database is available.
257 // Note that this is only needed outside the db thread, since functions on the
258 // db thread can call GetDatabase() directly.
259 bool MakeDatabaseAvailable();
261 // Called on the IO thread to try to close the database, freeing the memory
262 // associated with it. The database will be automatically reopened as needed.
264 // NOTE: Actual database closure is asynchronous, and until it happens, the IO
265 // thread is not allowed to access it; may not actually trigger a close if one
266 // is already pending or doing so would cause problems.
267 void CloseDatabase();
269 // Should only be called on db thread as SafeBrowsingDatabase is not
271 SafeBrowsingDatabase
* GetDatabase();
273 // Called on the IO thread with the check result.
274 void OnCheckDone(SafeBrowsingCheck
* info
);
276 // Called on the database thread to retrieve chunks.
277 void GetAllChunksFromDatabase(GetChunksCallback callback
);
279 // Called on the IO thread with the results of all chunks.
280 void OnGetAllChunksFromDatabase(const std::vector
<SBListChunkRanges
>& lists
,
282 GetChunksCallback callback
);
284 // Called on the IO thread after the database reports that it added a chunk.
285 void OnAddChunksComplete(AddChunksCallback callback
);
287 // Notification that the database is done loading its bloom filter. We may
288 // have had to queue checks until the database is ready, and if so, this
290 void DatabaseLoadComplete();
292 // Called on the database thread to add/remove chunks and host keys.
293 // Callee will free the data when it's done.
294 void AddDatabaseChunks(const std::string
& list
, SBChunkList
* chunks
,
295 AddChunksCallback callback
);
297 void DeleteDatabaseChunks(std::vector
<SBChunkDelete
>* chunk_deletes
);
299 static SBThreatType
GetThreatTypeFromListname(const std::string
& list_name
);
301 void NotifyClientBlockingComplete(Client
* client
, bool proceed
);
303 void DatabaseUpdateFinished(bool update_succeeded
);
305 // Called on the db thread to close the database. See CloseDatabase().
306 void OnCloseDatabase();
308 // Runs on the db thread to reset the database. We assume that resetting the
309 // database is a synchronous operation.
310 void OnResetDatabase();
312 // Store in-memory the GetHash response. Runs on the database thread.
313 void CacheHashResults(const std::vector
<SBPrefix
>& prefixes
,
314 const std::vector
<SBFullHashResult
>& full_hashes
);
316 // Internal worker function for processing full hashes.
317 void OnHandleGetHashResults(SafeBrowsingCheck
* check
,
318 const std::vector
<SBFullHashResult
>& full_hashes
);
320 // Run one check against |full_hashes|. Returns |true| if the check
321 // finds a match in |full_hashes|.
322 bool HandleOneCheck(SafeBrowsingCheck
* check
,
323 const std::vector
<SBFullHashResult
>& full_hashes
);
325 // Checks the download hash on safe_browsing_thread_.
326 void CheckDownloadHashOnSBThread(SafeBrowsingCheck
* check
);
328 // Invoked by CheckDownloadUrl. It checks the download URL on
329 // safe_browsing_thread_.
330 void CheckDownloadUrlOnSBThread(SafeBrowsingCheck
* check
);
332 // The callback function when a safebrowsing check is timed out. Client will
333 // be notified that the safebrowsing check is SAFE when this happens.
334 void TimeoutCallback(SafeBrowsingCheck
* check
);
336 // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
337 void CheckDownloadUrlDone(SafeBrowsingCheck
* check
);
339 // Calls the Client's callback on IO thread after CheckDownloadHash finishes.
340 void CheckDownloadHashDone(SafeBrowsingCheck
* check
);
342 // Checks all extension ID hashes on safe_browsing_thread_.
343 void CheckExtensionIDsOnSBThread(SafeBrowsingCheck
* check
);
345 // Helper function that calls safe browsing client and cleans up |checks_|.
346 void SafeBrowsingCheckDone(SafeBrowsingCheck
* check
);
348 // Helper function to set |check| with default values and start a safe
349 // browsing check with timeout of |timeout|. |task| will be called on
350 // success, otherwise TimeoutCallback will be called.
351 void StartSafeBrowsingCheck(SafeBrowsingCheck
* check
,
352 const base::Closure
& task
);
354 // SafeBrowsingProtocolManageDelegate override
355 virtual void ResetDatabase() OVERRIDE
;
356 virtual void UpdateStarted() OVERRIDE
;
357 virtual void UpdateFinished(bool success
) OVERRIDE
;
358 virtual void GetChunks(GetChunksCallback callback
) OVERRIDE
;
359 virtual void AddChunks(const std::string
& list
, SBChunkList
* chunks
,
360 AddChunksCallback callback
) OVERRIDE
;
361 virtual void DeleteChunks(
362 std::vector
<SBChunkDelete
>* delete_chunks
) OVERRIDE
;
364 scoped_refptr
<SafeBrowsingService
> sb_service_
;
366 CurrentChecks checks_
;
368 // Used for issuing only one GetHash request for a given prefix.
369 GetHashRequests gethash_requests_
;
371 // The persistent database. We don't use a scoped_ptr because it
372 // needs to be destroyed on a different thread than this object.
373 SafeBrowsingDatabase
* database_
;
375 // Lock used to prevent possible data races due to compiler optimizations.
376 mutable base::Lock database_lock_
;
378 // Whether the service is running. 'enabled_' is used by the
379 // SafeBrowsingDatabaseManager on the IO thread during normal operations.
382 // Indicate if download_protection is enabled by command switch
383 // so we allow this feature to be exersized.
384 bool enable_download_protection_
;
386 // Indicate if client-side phishing detection whitelist should be enabled
388 bool enable_csd_whitelist_
;
390 // Indicate if the download whitelist should be enabled or not.
391 bool enable_download_whitelist_
;
393 // Indicate if the extension blacklist should be enabled.
394 bool enable_extension_blacklist_
;
396 // Indicate if the side effect free whitelist should be enabled.
397 bool enable_side_effect_free_whitelist_
;
399 // Indicate if the csd malware IP blacklist should be enabled.
400 bool enable_ip_blacklist_
;
402 // The SafeBrowsing thread that runs database operations.
404 // Note: Functions that run on this thread should run synchronously and return
405 // to the IO thread, not post additional tasks back to this thread, lest we
406 // cause a race condition at shutdown time that leads to a database leak.
407 scoped_ptr
<base::Thread
> safe_browsing_thread_
;
409 // Indicates if we're currently in an update cycle.
410 bool update_in_progress_
;
412 // When true, newly fetched chunks may not in the database yet since the
413 // database is still updating.
414 bool database_update_in_progress_
;
416 // Indicates if we're in the midst of trying to close the database. If this
417 // is true, nothing on the IO thread should access the database.
418 bool closing_database_
;
420 std::deque
<QueuedCheck
> queued_checks_
;
422 // Timeout to use for safe browsing checks.
423 base::TimeDelta check_timeout_
;
425 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager
);
428 #endif // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_