NaCl: Update revision in DEPS, r12770 -> r12773
[chromium-blink-merge.git] / chrome / browser / safe_browsing / browser_feature_extractor.h
blob82a4e0d38b340e54d83961c4c683aa1b712127e2
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // BrowserFeatureExtractor computes various browser features for client-side
6 // phishing detection. For now it does a bunch of lookups in the history
7 // service to see whether a particular URL has been visited before by the
8 // user.
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
13 #include <map>
14 #include <set>
15 #include <string>
16 #include <utility>
17 #include <vector>
19 #include "base/basictypes.h"
20 #include "base/callback.h"
21 #include "base/containers/hash_tables.h"
22 #include "base/memory/scoped_ptr.h"
23 #include "base/sequenced_task_runner_helpers.h"
24 #include "base/time/time.h"
25 #include "chrome/browser/common/cancelable_request.h"
26 #include "chrome/browser/history/history_types.h"
27 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
28 #include "chrome/browser/safe_browsing/ui_manager.h"
29 #include "url/gurl.h"
30 #include "webkit/common/resource_type.h"
33 class HistoryService;
35 namespace content {
36 class WebContents;
39 namespace safe_browsing {
40 class ClientMalwareRequest;
41 class ClientPhishingRequest;
42 class ClientSideDetectionHost;
44 struct IPUrlInfo {
45 // The url on the bad IP address.
46 std::string url;
47 std::string method;
48 std::string referrer;
49 ResourceType::Type resource_type;
51 IPUrlInfo(const std::string& url,
52 const std::string& method,
53 const std::string& referrer,
54 const ResourceType::Type& resource_type);
55 ~IPUrlInfo();
58 typedef std::map<std::string, std::vector<IPUrlInfo> > IPUrlMap;
60 struct BrowseInfo {
61 // List of IPv4 and IPv6 addresses from which content was requested
62 // together with the hosts on it, while browsing to the |url|.
63 IPUrlMap ips;
65 // If a SafeBrowsing interstitial was shown for the current URL
66 // this will contain the UnsafeResource struct for that URL.
67 scoped_ptr<SafeBrowsingUIManager::UnsafeResource> unsafe_resource;
69 // List of redirects that lead to the first page on the current host and
70 // the current url respectively. These may be the same if the current url
71 // is the first page on its host.
72 std::vector<GURL> host_redirects;
73 std::vector<GURL> url_redirects;
75 // URL of the referrer of this URL load.
76 GURL referrer;
78 // The HTTP status code from this navigation.
79 int http_status_code;
81 BrowseInfo();
82 ~BrowseInfo();
85 // All methods of this class must be called on the UI thread (including
86 // the constructor).
87 class BrowserFeatureExtractor {
88 public:
89 // Called when feature extraction is done. The first argument will be
90 // true iff feature extraction succeeded. The second argument is the
91 // phishing request which was modified by the feature extractor. The
92 // DoneCallback takes ownership of the request object.
93 typedef base::Callback<void(bool, ClientPhishingRequest*)> DoneCallback;
94 typedef base::Callback<void(bool, scoped_ptr<ClientMalwareRequest>)>
95 MalwareDoneCallback;
97 // The caller keeps ownership of the tab and host objects and is
98 // responsible for ensuring that they stay valid for the entire
99 // lifetime of this object.
100 BrowserFeatureExtractor(content::WebContents* tab,
101 ClientSideDetectionHost* host);
103 // The destructor will cancel any pending requests.
104 virtual ~BrowserFeatureExtractor();
106 // Begins extraction of the browser features. We take ownership
107 // of the request object until |callback| is called (see DoneCallback above)
108 // and will write the extracted features to the feature map. Once the
109 // feature extraction is complete, |callback| is run on the UI thread. We
110 // take ownership of the |callback| object. |info| may not be valid after
111 // ExtractFeatures returns. This method must run on the UI thread.
112 virtual void ExtractFeatures(const BrowseInfo* info,
113 ClientPhishingRequest* request,
114 const DoneCallback& callback);
116 // Begins extraction of the malware related features. We take ownership
117 // of the request object until |callback| is called. Once feature extraction
118 // is complete, |callback| will run on the UI thread. |info| is not expected
119 // to stay valid after ExtractMalwareFeatures returns. All IPs stored in
120 // |info| will be cleared by calling this function.
121 virtual void ExtractMalwareFeatures(BrowseInfo* info,
122 ClientMalwareRequest* request,
123 const MalwareDoneCallback& callback);
125 private:
126 friend class base::DeleteHelper<BrowserFeatureExtractor>;
127 typedef std::pair<ClientPhishingRequest*, DoneCallback> ExtractionData;
128 typedef std::map<CancelableRequestProvider::Handle,
129 ExtractionData> PendingQueriesMap;
131 // Synchronous browser feature extraction.
132 void ExtractBrowseInfoFeatures(const BrowseInfo& info,
133 ClientPhishingRequest* request);
135 // Actually starts feature extraction (does the real work).
136 void StartExtractFeatures(ClientPhishingRequest* request,
137 const DoneCallback& callback);
139 // HistoryService callback which is called when we're done querying URL visits
140 // in the history.
141 void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle,
142 bool success,
143 const history::URLRow* row,
144 history::VisitVector* visits);
146 // HistoryService callback which is called when we're done querying HTTP host
147 // visits in the history.
148 void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle,
149 bool success,
150 int num_visits,
151 base::Time first_visit);
153 // HistoryService callback which is called when we're done querying HTTPS host
154 // visits in the history.
155 void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle,
156 bool success,
157 int num_visits,
158 base::Time first_visit);
160 // Helper function which sets the host history features given the
161 // number of host visits and the time of the fist host visit. Set
162 // |is_http_query| to true if the URL scheme is HTTP and to false if
163 // the scheme is HTTPS.
164 void SetHostVisitsFeatures(int num_visits,
165 base::Time first_visit,
166 bool is_http_query,
167 ClientPhishingRequest* request);
169 // Helper function which stores the request and callback while the history
170 // query is being processed.
171 void StorePendingQuery(CancelableRequestProvider::Handle handle,
172 ClientPhishingRequest* request,
173 const DoneCallback& callback);
175 // Helper function which is the counterpart of StorePendingQuery. If there
176 // is a pending query for the given handle it will return false and set both
177 // the request and cb pointers. Otherwise, it will return false.
178 bool GetPendingQuery(CancelableRequestProvider::Handle handle,
179 ClientPhishingRequest** request,
180 DoneCallback* callback);
182 // Helper function which gets the history server if possible. If the pointer
183 // is set it will return true and false otherwise.
184 bool GetHistoryService(HistoryService** history);
186 // Helper function which is called when we're done filtering out benign IPs
187 // on the IO thread. This function is called on the UI thread.
188 void FinishExtractMalwareFeatures(scoped_ptr<IPUrlMap> bad_ips,
189 MalwareDoneCallback callback,
190 scoped_ptr<ClientMalwareRequest> request);
192 content::WebContents* tab_;
193 ClientSideDetectionHost* host_;
194 CancelableRequestConsumer request_consumer_;
195 base::WeakPtrFactory<BrowserFeatureExtractor> weak_factory_;
197 // Set of pending extractions (i.e. extractions for which ExtractFeatures was
198 // called but not StartExtractFeatures).
199 std::map<ClientPhishingRequest*, DoneCallback> pending_extractions_;
201 // Set of pending queries (i.e., where history->Query...() was called but
202 // the history callback hasn't been invoked yet).
203 PendingQueriesMap pending_queries_;
205 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor);
208 } // namespace safe_browsing
209 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_