1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The Safe Browsing service is responsible for downloading anti-phishing and
6 // anti-malware tables and checking urls against them.
8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
17 #include "base/callback.h"
18 #include "base/containers/hash_tables.h"
19 #include "base/memory/ref_counted.h"
20 #include "base/memory/scoped_ptr.h"
21 #include "base/synchronization/lock.h"
22 #include "base/time/time.h"
23 #include "chrome/browser/safe_browsing/protocol_manager.h"
24 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
27 class SafeBrowsingService
;
28 class SafeBrowsingDatabase
;
31 class URLRequestContext
;
32 class URLRequestContextGetter
;
35 namespace safe_browsing
{
36 class ClientSideDetectionService
;
37 class DownloadProtectionService
;
40 // Construction needs to happen on the main thread.
41 class SafeBrowsingDatabaseManager
42 : public base::RefCountedThreadSafe
<SafeBrowsingDatabaseManager
>,
43 public SafeBrowsingProtocolManagerDelegate
{
47 // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
48 struct SafeBrowsingCheck
{
49 // |check_type| should correspond to the type of item that is being
50 // checked, either a URL or a binary hash/URL. We store this for two
51 // purposes: to know which of Client's methods to call when a result is
52 // known, and for logging purposes. It *isn't* used to predict the response
53 // list type, that is information that the server gives us.
54 SafeBrowsingCheck(const std::vector
<GURL
>& urls
,
55 const std::vector
<SBFullHash
>& full_hashes
,
57 safe_browsing_util::ListType check_type
,
58 const std::vector
<SBThreatType
>& expected_threats
);
61 // Either |urls| or |full_hashes| is used to lookup database. |*_results|
62 // are parallel vectors containing the results. They are initialized to
63 // contain SB_THREAT_TYPE_SAFE.
64 std::vector
<GURL
> urls
;
65 std::vector
<SBThreatType
> url_results
;
66 std::vector
<std::string
> url_metadata
;
67 std::vector
<SBFullHash
> full_hashes
;
68 std::vector
<SBThreatType
> full_hash_results
;
72 base::TimeTicks start
; // When check was sent to SB service.
73 safe_browsing_util::ListType check_type
; // See comment in constructor.
74 std::vector
<SBThreatType
> expected_threats
;
75 std::vector
<SBPrefix
> prefix_hits
;
76 std::vector
<SBFullHashResult
> cache_hits
;
78 // Vends weak pointers for async callbacks on the IO thread, such as
79 // timeout checks and replies from checks performed on the SB task runner.
80 // TODO(lzheng): We should consider to use this time out check
81 // for browsing too (instead of implementing in
82 // safe_browsing_resource_handler.cc).
83 scoped_ptr
<base::WeakPtrFactory
<
84 SafeBrowsingDatabaseManager
> > weak_ptr_factory_
;
87 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck
);
92 void OnSafeBrowsingResult(const SafeBrowsingCheck
& check
);
97 // Called when the result of checking a browse URL is known.
98 virtual void OnCheckBrowseUrlResult(const GURL
& url
,
99 SBThreatType threat_type
,
100 const std::string
& metadata
) {}
102 // Called when the result of checking a download URL is known.
103 virtual void OnCheckDownloadUrlResult(const std::vector
<GURL
>& url_chain
,
104 SBThreatType threat_type
) {}
106 // Called when the result of checking a set of extensions is known.
107 virtual void OnCheckExtensionsResult(
108 const std::set
<std::string
>& threats
) {}
111 // Creates the safe browsing service. Need to initialize before using.
112 explicit SafeBrowsingDatabaseManager(
113 const scoped_refptr
<SafeBrowsingService
>& service
);
115 // Returns true if the url's scheme can be checked.
116 bool CanCheckUrl(const GURL
& url
) const;
118 // Returns whether download protection is enabled.
119 bool download_protection_enabled() const {
120 return enable_download_protection_
;
123 // Called on the IO thread to check if the given url is safe or not. If we
124 // can synchronously determine that the url is safe, CheckUrl returns true.
125 // Otherwise it returns false, and "client" is called asynchronously with the
126 // result when it is ready.
127 virtual bool CheckBrowseUrl(const GURL
& url
, Client
* client
);
129 // Check if the prefix for |url| is in safebrowsing download add lists.
130 // Result will be passed to callback in |client|.
131 virtual bool CheckDownloadUrl(const std::vector
<GURL
>& url_chain
,
134 // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
135 // Returns true if not, false if further checks need to be made in which case
136 // the result will be passed to |client|.
137 virtual bool CheckExtensionIDs(const std::set
<std::string
>& extension_ids
,
140 // Check if the given url is on the side-effect free whitelist.
141 // Can be called on any thread. Returns false if the check cannot be performed
142 // (e.g. because we are disabled or because of an invalid scheme in the URL).
143 // Otherwise, returns true if the URL is on the whitelist based on matching
144 // the hash prefix only (so there may be false positives).
145 virtual bool CheckSideEffectFreeWhitelistUrl(const GURL
& url
);
147 // Check if the |url| matches any of the full-length hashes from the client-
148 // side phishing detection whitelist. Returns true if there was a match and
149 // false otherwise. To make sure we are conservative we will return true if
150 // an error occurs. This method must be called on the IO thread.
151 virtual bool MatchCsdWhitelistUrl(const GURL
& url
);
153 // Check if the given IP address (either IPv4 or IPv6) matches the malware
155 virtual bool MatchMalwareIP(const std::string
& ip_address
);
157 // Check if the |url| matches any of the full-length hashes from the download
158 // whitelist. Returns true if there was a match and false otherwise. To make
159 // sure we are conservative we will return true if an error occurs. This
160 // method must be called on the IO thread.
161 virtual bool MatchDownloadWhitelistUrl(const GURL
& url
);
163 // Check if |str| matches any of the full-length hashes from the download
164 // whitelist. Returns true if there was a match and false otherwise. To make
165 // sure we are conservative we will return true if an error occurs. This
166 // method must be called on the IO thread.
167 virtual bool MatchDownloadWhitelistString(const std::string
& str
);
169 // Check if the |url| matches any of the full-length hashes from the off-
170 // domain inclusion whitelist. Returns true if there was a match and false
171 // otherwise. To make sure we are conservative, we will return true if an
172 // error occurs. This method must be called on the IO thread.
173 virtual bool MatchInclusionWhitelistUrl(const GURL
& url
);
175 // Check if the CSD malware IP matching kill switch is turned on.
176 virtual bool IsMalwareKillSwitchOn();
178 // Check if the CSD whitelist kill switch is turned on.
179 virtual bool IsCsdWhitelistKillSwitchOn();
181 // Called on the IO thread to cancel a pending check if the result is no
183 void CancelCheck(Client
* client
);
185 // Called on the IO thread when the SafeBrowsingProtocolManager has received
186 // the full hash results for prefix hits detected in the database.
187 void HandleGetHashResults(SafeBrowsingCheck
* check
,
188 const std::vector
<SBFullHashResult
>& full_hashes
,
189 const base::TimeDelta
& cache_lifetime
);
191 // Called to initialize objects that are used on the io_thread. This may be
192 // called multiple times during the life of the DatabaseManager. Must be
193 // called on IO thread.
194 void StartOnIOThread();
196 // Called to stop or shutdown operations on the io_thread. This may be called
197 // multiple times during the life of the DatabaseManager. Must be called
198 // on IO thread. If shutdown is true, the manager is disabled permanently.
199 void StopOnIOThread(bool shutdown
);
202 ~SafeBrowsingDatabaseManager() override
;
204 // protected for tests.
205 void NotifyDatabaseUpdateFinished(bool update_succeeded
);
208 friend class base::RefCountedThreadSafe
<SafeBrowsingDatabaseManager
>;
209 friend class SafeBrowsingServerTest
;
210 friend class SafeBrowsingServiceTest
;
211 friend class SafeBrowsingServiceTestHelper
;
212 friend class SafeBrowsingDatabaseManagerTest
;
213 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseManagerTest
,
214 GetUrlSeverestThreatType
);
215 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseManagerTest
,
216 ServiceStopWithPendingChecks
);
218 typedef std::set
<SafeBrowsingCheck
*> CurrentChecks
;
219 typedef std::vector
<SafeBrowsingCheck
*> GetHashRequestors
;
220 typedef base::hash_map
<SBPrefix
, GetHashRequestors
> GetHashRequests
;
222 // Clients that we've queued up for checking later once the database is ready.
224 QueuedCheck(const safe_browsing_util::ListType check_type
,
227 const std::vector
<SBThreatType
>& expected_threats
,
228 const base::TimeTicks
& start
);
230 safe_browsing_util::ListType check_type
;
233 std::vector
<SBThreatType
> expected_threats
;
234 base::TimeTicks start
; // When check was queued.
237 // Return the threat type of the severest entry in |full_hashes| which matches
238 // |hash|, or SAFE if none match.
239 static SBThreatType
GetHashSeverestThreatType(
240 const SBFullHash
& hash
,
241 const std::vector
<SBFullHashResult
>& full_hashes
);
243 // Given a URL, compare all the possible host + path full hashes to the set of
244 // provided full hashes. Returns the threat type of the severest matching
245 // result from |full_hashes|, or SAFE if none match.
246 static SBThreatType
GetUrlSeverestThreatType(
248 const std::vector
<SBFullHashResult
>& full_hashes
,
251 // Called to stop operations on the io_thread. This may be called multiple
252 // times during the life of the DatabaseManager. Should be called on IO
254 void DoStopOnIOThread();
256 // Returns whether |database_| exists and is accessible.
257 bool DatabaseAvailable() const;
259 // Called on the IO thread. If the database does not exist, queues up a call
260 // on the db thread to create it. Returns whether the database is available.
262 // Note that this is only needed outside the db thread, since functions on the
263 // db thread can call GetDatabase() directly.
264 bool MakeDatabaseAvailable();
266 // Should only be called on db thread as SafeBrowsingDatabase is not
268 SafeBrowsingDatabase
* GetDatabase();
270 // Called on the IO thread with the check result.
271 void OnCheckDone(SafeBrowsingCheck
* info
);
273 // Called on the database thread to retrieve chunks.
274 void GetAllChunksFromDatabase(GetChunksCallback callback
);
276 // Called on the IO thread with the results of all chunks.
277 void OnGetAllChunksFromDatabase(const std::vector
<SBListChunkRanges
>& lists
,
279 GetChunksCallback callback
);
281 // Called on the IO thread after the database reports that it added a chunk.
282 void OnAddChunksComplete(AddChunksCallback callback
);
284 // Notification that the database is done loading its bloom filter. We may
285 // have had to queue checks until the database is ready, and if so, this
287 void DatabaseLoadComplete();
289 // Called on the database thread to add/remove chunks and host keys.
290 void AddDatabaseChunks(const std::string
& list
,
291 scoped_ptr
<ScopedVector
<SBChunkData
> > chunks
,
292 AddChunksCallback callback
);
294 void DeleteDatabaseChunks(
295 scoped_ptr
<std::vector
<SBChunkDelete
> > chunk_deletes
);
297 void NotifyClientBlockingComplete(Client
* client
, bool proceed
);
299 void DatabaseUpdateFinished(bool update_succeeded
);
301 // Called on the db thread to close the database. See CloseDatabase().
302 void OnCloseDatabase();
304 // Runs on the db thread to reset the database. We assume that resetting the
305 // database is a synchronous operation.
306 void OnResetDatabase();
308 // Internal worker function for processing full hashes.
309 void OnHandleGetHashResults(SafeBrowsingCheck
* check
,
310 const std::vector
<SBFullHashResult
>& full_hashes
);
312 // Run one check against |full_hashes|. Returns |true| if the check
313 // finds a match in |full_hashes|.
314 bool HandleOneCheck(SafeBrowsingCheck
* check
,
315 const std::vector
<SBFullHashResult
>& full_hashes
);
317 // Invoked by CheckDownloadUrl. It checks the download URL on
318 // |safe_browsing_task_runner_|.
319 std::vector
<SBPrefix
> CheckDownloadUrlOnSBThread(
320 const std::vector
<SBPrefix
>& prefixes
);
322 // The callback function when a safebrowsing check is timed out. Client will
323 // be notified that the safebrowsing check is SAFE when this happens.
324 void TimeoutCallback(SafeBrowsingCheck
* check
);
326 // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
327 void OnAsyncCheckDone(SafeBrowsingCheck
* check
,
328 const std::vector
<SBPrefix
>& prefix_hits
);
330 // Checks all extension ID hashes on |safe_browsing_task_runner_|.
331 std::vector
<SBPrefix
> CheckExtensionIDsOnSBThread(
332 const std::vector
<SBPrefix
>& prefixes
);
334 // Helper function that calls safe browsing client and cleans up |checks_|.
335 void SafeBrowsingCheckDone(SafeBrowsingCheck
* check
);
337 // Helper function to set |check| with default values and start a safe
338 // browsing check with timeout of |timeout|. |task| will be called on
339 // success, otherwise TimeoutCallback will be called.
340 void StartSafeBrowsingCheck(
341 SafeBrowsingCheck
* check
,
342 const base::Callback
<std::vector
<SBPrefix
>(void)>& task
);
344 // SafeBrowsingProtocolManageDelegate override
345 void ResetDatabase() override
;
346 void UpdateStarted() override
;
347 void UpdateFinished(bool success
) override
;
348 void GetChunks(GetChunksCallback callback
) override
;
349 void AddChunks(const std::string
& list
,
350 scoped_ptr
<ScopedVector
<SBChunkData
>> chunks
,
351 AddChunksCallback callback
) override
;
353 scoped_ptr
<std::vector
<SBChunkDelete
>> chunk_deletes
) override
;
355 scoped_refptr
<SafeBrowsingService
> sb_service_
;
357 CurrentChecks checks_
;
359 // Used for issuing only one GetHash request for a given prefix.
360 GetHashRequests gethash_requests_
;
362 // The persistent database. We don't use a scoped_ptr because it
363 // needs to be destroyed on a different thread than this object.
364 SafeBrowsingDatabase
* database_
;
366 // Lock used to prevent possible data races due to compiler optimizations.
367 mutable base::Lock database_lock_
;
369 // Whether the service is running. 'enabled_' is used by the
370 // SafeBrowsingDatabaseManager on the IO thread during normal operations.
373 // Indicate if download_protection is enabled by command switch
374 // so we allow this feature to be exersized.
375 bool enable_download_protection_
;
377 // Indicate if client-side phishing detection whitelist should be enabled
379 bool enable_csd_whitelist_
;
381 // Indicate if the download whitelist should be enabled or not.
382 bool enable_download_whitelist_
;
384 // Indicate if the extension blacklist should be enabled.
385 bool enable_extension_blacklist_
;
387 // Indicate if the side effect free whitelist should be enabled.
388 bool enable_side_effect_free_whitelist_
;
390 // Indicate if the csd malware IP blacklist should be enabled.
391 bool enable_ip_blacklist_
;
393 // Indicate if the unwanted software blacklist should be enabled.
394 bool enable_unwanted_software_blacklist_
;
396 // The sequenced task runner for running safe browsing database operations.
397 scoped_refptr
<base::SequencedTaskRunner
> safe_browsing_task_runner_
;
399 // Indicates if we're currently in an update cycle.
400 bool update_in_progress_
;
402 // When true, newly fetched chunks may not in the database yet since the
403 // database is still updating.
404 bool database_update_in_progress_
;
406 // Indicates if we're in the midst of trying to close the database. If this
407 // is true, nothing on the IO thread should access the database.
408 bool closing_database_
;
410 std::deque
<QueuedCheck
> queued_checks_
;
412 // Timeout to use for safe browsing checks.
413 base::TimeDelta check_timeout_
;
415 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager
);
418 #endif // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_