chrome/browser/safe_browsing/client_side_detection_service.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4 //
   5 // Helper class which handles communication with the SafeBrowsing backends for
   6 // client-side phishing detection.  This class is used to fetch the client-side
   7 // model and send it to all renderers.  This class is also used to send a ping
   8 // back to Google to verify if a particular site is really phishing or not.
   9 //
  10 // This class is not thread-safe and expects all calls to be made on the UI
  11 // thread.  We also expect that the calling thread runs a message loop.
  12
  13 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
  14 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
  15
  16 #include <map>
  17 #include <queue>
  18 #include <set>
  19 #include <string>
  20 #include <utility>
  21 #include <vector>
  22
  23 #include "base/basictypes.h"
  24 #include "base/callback_forward.h"
  25 #include "base/gtest_prod_util.h"
  26 #include "base/memory/linked_ptr.h"
  27 #include "base/memory/ref_counted.h"
  28 #include "base/memory/scoped_ptr.h"
  29 #include "base/memory/weak_ptr.h"
  30 #include "base/time/time.h"
  31 #include "chrome/browser/safe_browsing/client_side_model_loader.h"
  32 #include "content/public/browser/browser_thread.h"
  33 #include "content/public/browser/notification_observer.h"
  34 #include "content/public/browser/notification_registrar.h"
  35 #include "net/base/net_util.h"
  36 #include "net/url_request/url_fetcher_delegate.h"
  37 #include "url/gurl.h"
  38
  39 class SafeBrowsingService;
  40
  41 namespace base {
  42 class TimeDelta;
  43 }
  44
  45 namespace content {
  46 class RenderProcessHost;
  47 }
  48
  49 namespace net {
  50 class URLFetcher;
  51 class URLRequestContextGetter;
  52 class URLRequestStatus;
  53 typedef std::vector<std::string> ResponseCookies;
  54 }  // namespace net
  55
  56 namespace safe_browsing {
  57 class ClientMalwareRequest;
  58 class ClientPhishingRequest;
  59 class ClientPhishingResponse;
  60 class ClientSideModel;
  61
  62 // Main service which pushes models to the renderers, responds to classification
  63 // requests. This owns two ModelLoader objects.
  64 class ClientSideDetectionService : public net::URLFetcherDelegate,
  65                                    public content::NotificationObserver {
  66  public:
  67   // void(GURL phishing_url, bool is_phishing).
  68   typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback;
  69   // void(GURL original_url, GURL malware_url, bool is_malware).
  70   typedef base::Callback<void(GURL, GURL, bool)>
  71       ClientReportMalwareRequestCallback;
  72
  73   ~ClientSideDetectionService() override;
  74
  75   // Creates a client-side detection service.  The service is initially
  76   // disabled, use SetEnabledAndRefreshState() to start it.  The caller takes
  77   // ownership of the object.  This function may return NULL.
  78   static ClientSideDetectionService* Create(
  79       net::URLRequestContextGetter* request_context_getter);
  80
  81   // Enables or disables the service, and refreshes the state of all renderers.
  82   // This is usually called by the SafeBrowsingService, which tracks whether
  83   // any profile uses these services at all.  Disabling cancels any pending
  84   // requests; existing ClientSideDetectionHosts will have their callbacks
  85   // called with "false" verdicts.  Enabling starts downloading the model after
  86   // a delay.  In all cases, each render process is updated to match the state
  87   // of the SafeBrowsing preference for that profile.
  88   void SetEnabledAndRefreshState(bool enabled);
  89
  90   bool enabled() const {
  91     DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
  92     return enabled_;
  93   }
  94
  95   // From the net::URLFetcherDelegate interface.
  96   void OnURLFetchComplete(const net::URLFetcher* source) override;
  97
  98   // content::NotificationObserver overrides:
  99   void Observe(int type,
 100                const content::NotificationSource& source,
 101                const content::NotificationDetails& details) override;
 102
 103   // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
 104   // The URL scheme of the |url()| in the request should be HTTP.  This method
 105   // takes ownership of the |verdict| as well as the |callback| and calls the
 106   // the callback once the result has come back from the server or if an error
 107   // occurs during the fetch.  |is_extended_reporting| should be set based on
 108   // the active profile setting. If the service is disabled or an error occurs
 109   // the phishing verdict will always be false.  The callback is always called
 110   // after SendClientReportPhishingRequest() returns and on the same thread as
 111   // SendClientReportPhishingRequest() was called.  You may set |callback| to
 112   // NULL if you don't care about the server verdict.
 113   virtual void SendClientReportPhishingRequest(
 114       ClientPhishingRequest* verdict,
 115       bool is_extended_reporting,
 116       const ClientReportPhishingRequestCallback& callback);
 117
 118   // Similar to above one, instead send ClientMalwareRequest
 119   virtual void SendClientReportMalwareRequest(
 120       ClientMalwareRequest* verdict,
 121       const ClientReportMalwareRequestCallback& callback);
 122
 123   // Returns true if the given IP address string falls within a private
 124   // (unroutable) network block.  Pages which are hosted on these IP addresses
 125   // are exempt from client-side phishing detection.  This is called by the
 126   // ClientSideDetectionHost prior to sending the renderer a
 127   // SafeBrowsingMsg_StartPhishingDetection IPC.
 128   //
 129   // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
 130   // address.
 131   virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
 132
 133   // Returns true and sets is_phishing if url is in the cache and valid.
 134   virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
 135
 136   // Returns true if the url is in the cache.
 137   virtual bool IsInCache(const GURL& url);
 138
 139   // Returns true if we have sent more than kMaxReportsPerInterval phishing
 140   // reports in the last kReportsInterval.
 141   virtual bool OverPhishingReportLimit();
 142
 143   // Returns true if we have sent more than kMaxReportsPerInterval malware
 144   // reports in the last kReportsInterval.
 145   virtual bool OverMalwareReportLimit();
 146
 147   // Sends a model to each renderer.
 148   virtual void SendModelToRenderers();
 149
 150   base::WeakPtr<ClientSideDetectionService> GetWeakPtr();
 151
 152  protected:
 153   // Use Create() method to create an instance of this object.
 154   explicit ClientSideDetectionService(
 155       net::URLRequestContextGetter* request_context_getter);
 156
 157  private:
 158   friend class ClientSideDetectionServiceTest;
 159   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
 160                            SetEnabledAndRefreshState);
 161
 162   // CacheState holds all information necessary to respond to a caller without
 163   // actually making a HTTP request.
 164   struct CacheState {
 165     bool is_phishing;
 166     base::Time timestamp;
 167
 168     CacheState(bool phish, base::Time time);
 169   };
 170   typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
 171
 172   static const char kClientReportMalwareUrl[];
 173   static const char kClientReportPhishingUrl[];
 174   static const int kMaxReportsPerInterval;
 175   static const int kInitialClientModelFetchDelayMs;
 176   static const int kReportsIntervalDays;
 177   static const int kNegativeCacheIntervalDays;
 178   static const int kPositiveCacheIntervalMinutes;
 179
 180   // Starts sending the request to the client-side detection frontends.
 181   // This method takes ownership of both pointers.
 182   void StartClientReportPhishingRequest(
 183       ClientPhishingRequest* verdict,
 184       bool is_extended_reporting,
 185       const ClientReportPhishingRequestCallback& callback);
 186
 187   void StartClientReportMalwareRequest(
 188       ClientMalwareRequest* verdict,
 189       const ClientReportMalwareRequestCallback& callback);
 190
 191   // Called by OnURLFetchComplete to handle the server response from
 192   // sending the client-side phishing request.
 193   void HandlePhishingVerdict(const net::URLFetcher* source,
 194                              const GURL& url,
 195                              const net::URLRequestStatus& status,
 196                              int response_code,
 197                              const net::ResponseCookies& cookies,
 198                              const std::string& data);
 199
 200   // Called by OnURLFetchComplete to handle the server response from
 201   // sending the client-side malware request.
 202   void HandleMalwareVerdict(const net::URLFetcher* source,
 203                             const GURL& url,
 204                             const net::URLRequestStatus& status,
 205                             int response_code,
 206                             const net::ResponseCookies& cookies,
 207                             const std::string& data);
 208
 209   // Invalidate cache results which are no longer useful.
 210   void UpdateCache();
 211
 212   // Get the number of malware reports that we have sent over kReportsInterval.
 213   int GetMalwareNumReports();
 214
 215   // Get the number of phishing reports that we have sent over kReportsInterval.
 216   int GetPhishingNumReports();
 217
 218   // Get the number of reports that we have sent over kReportsInterval, and
 219   // trims off the old elements.
 220   int GetNumReports(std::queue<base::Time>* report_times);
 221
 222   // Send the model to the given renderer.
 223   void SendModelToProcess(content::RenderProcessHost* process);
 224
 225   // Returns the URL that will be used for phishing requests.
 226   static GURL GetClientReportUrl(const std::string& report_url);
 227
 228   // Whether the service is running or not.  When the service is not running,
 229   // it won't download the model nor report detected phishing URLs.
 230   bool enabled_;
 231
 232   // We load two models: One for stadard Safe Browsing profiles,
 233   // and one for those opted into extended reporting.
 234   scoped_ptr<ModelLoader> model_loader_standard_;
 235   scoped_ptr<ModelLoader> model_loader_extended_;
 236
 237   // Map of client report phishing request to the corresponding callback that
 238   // has to be invoked when the request is done.
 239   struct ClientReportInfo;
 240   std::map<const net::URLFetcher*, ClientReportInfo*>
 241       client_phishing_reports_;
 242   // Map of client malware ip request to the corresponding callback that
 243   // has to be invoked when the request is done.
 244   struct ClientMalwareReportInfo;
 245   std::map<const net::URLFetcher*, ClientMalwareReportInfo*>
 246       client_malware_reports_;
 247
 248   // Cache of completed requests. Used to satisfy requests for the same urls
 249   // as long as the next request falls within our caching window (which is
 250   // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
 251   // size of this cache is limited by kMaxReportsPerDay *
 252   // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
 253   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
 254   PhishingCache cache_;
 255
 256   // Timestamp of when we sent a phishing request. Used to limit the number
 257   // of phishing requests that we send in a day.
 258   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
 259   std::queue<base::Time> phishing_report_times_;
 260
 261   // Timestamp of when we sent a malware request. Used to limit the number
 262   // of malware requests that we send in a day.
 263   std::queue<base::Time> malware_report_times_;
 264
 265   // The context we use to issue network requests.
 266   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
 267
 268   content::NotificationRegistrar registrar_;
 269
 270   // Used to asynchronously call the callbacks for
 271   // SendClientReportPhishingRequest.
 272   base::WeakPtrFactory<ClientSideDetectionService> weak_factory_;
 273
 274   DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
 275 };
 276
 277 }  // namespace safe_browsing
 278
 279 #endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_