Adding instrumentation to locate the source of jankiness
[chromium-blink-merge.git] / chrome / browser / safe_browsing / browser_feature_extractor.h
blobcab0471f5593989293e5ae2760bbfff606036609
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // BrowserFeatureExtractor computes various browser features for client-side
6 // phishing detection. For now it does a bunch of lookups in the history
7 // service to see whether a particular URL has been visited before by the
8 // user.
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
13 #include <map>
14 #include <set>
15 #include <string>
16 #include <utility>
17 #include <vector>
19 #include "base/basictypes.h"
20 #include "base/callback.h"
21 #include "base/containers/hash_tables.h"
22 #include "base/memory/scoped_ptr.h"
23 #include "base/task/cancelable_task_tracker.h"
24 #include "base/time/time.h"
25 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
26 #include "chrome/browser/safe_browsing/ui_manager.h"
27 #include "components/history/core/browser/history_types.h"
28 #include "content/public/common/resource_type.h"
29 #include "url/gurl.h"
32 class HistoryService;
34 namespace content {
35 class WebContents;
38 namespace safe_browsing {
39 class ClientMalwareRequest;
40 class ClientPhishingRequest;
41 class ClientSideDetectionHost;
43 struct IPUrlInfo {
44 // The url on the bad IP address.
45 std::string url;
46 std::string method;
47 std::string referrer;
48 content::ResourceType resource_type;
50 IPUrlInfo(const std::string& url,
51 const std::string& method,
52 const std::string& referrer,
53 const content::ResourceType& resource_type);
54 ~IPUrlInfo();
57 typedef std::map<std::string, std::vector<IPUrlInfo> > IPUrlMap;
59 struct BrowseInfo {
60 // The URL we're currently browsing.
61 GURL url;
63 // List of IPv4 and IPv6 addresses from which content was requested
64 // together with the hosts on it, while browsing to the |url|.
65 IPUrlMap ips;
67 // If a SafeBrowsing interstitial was shown for the current URL
68 // this will contain the UnsafeResource struct for that URL.
69 scoped_ptr<SafeBrowsingUIManager::UnsafeResource> unsafe_resource;
71 // List of redirects that lead to the first page on the current host and
72 // the current url respectively. These may be the same if the current url
73 // is the first page on its host.
74 std::vector<GURL> host_redirects;
75 std::vector<GURL> url_redirects;
77 // URL of the referrer of this URL load.
78 GURL referrer;
80 // The HTTP status code from this navigation.
81 int http_status_code;
83 // The page ID of the navigation. This comes from FrameNavigateParams.
84 int32 page_id;
86 BrowseInfo();
87 ~BrowseInfo();
90 // All methods of this class must be called on the UI thread (including
91 // the constructor).
92 class BrowserFeatureExtractor {
93 public:
94 // Called when feature extraction is done. The first argument will be
95 // true iff feature extraction succeeded. The second argument is the
96 // phishing request which was modified by the feature extractor. The
97 // DoneCallback takes ownership of the request object.
98 typedef base::Callback<void(bool, scoped_ptr<ClientPhishingRequest>)>
99 DoneCallback;
100 typedef base::Callback<void(bool, scoped_ptr<ClientMalwareRequest>)>
101 MalwareDoneCallback;
103 // The caller keeps ownership of the tab and host objects and is
104 // responsible for ensuring that they stay valid for the entire
105 // lifetime of this object.
106 BrowserFeatureExtractor(content::WebContents* tab,
107 ClientSideDetectionHost* host);
109 // The destructor will cancel any pending requests.
110 virtual ~BrowserFeatureExtractor();
112 // Begins extraction of the browser features. We take ownership
113 // of the request object until |callback| is called (see DoneCallback above)
114 // and will write the extracted features to the feature map. Once the
115 // feature extraction is complete, |callback| is run on the UI thread. We
116 // take ownership of the |callback| object. |info| may not be valid after
117 // ExtractFeatures returns. This method must run on the UI thread.
118 virtual void ExtractFeatures(const BrowseInfo* info,
119 ClientPhishingRequest* request,
120 const DoneCallback& callback);
122 // Begins extraction of the malware related features. We take ownership
123 // of the request object until |callback| is called. Once feature extraction
124 // is complete, |callback| will run on the UI thread. |info| is not expected
125 // to stay valid after ExtractMalwareFeatures returns. All IPs stored in
126 // |info| will be cleared by calling this function.
127 virtual void ExtractMalwareFeatures(BrowseInfo* info,
128 ClientMalwareRequest* request,
129 const MalwareDoneCallback& callback);
131 private:
132 // Synchronous browser feature extraction.
133 void ExtractBrowseInfoFeatures(const BrowseInfo& info,
134 ClientPhishingRequest* request);
136 // Actually starts feature extraction (does the real work).
137 void StartExtractFeatures(scoped_ptr<ClientPhishingRequest> request,
138 const DoneCallback& callback);
140 // HistoryService callback which is called when we're done querying URL visits
141 // in the history.
142 void QueryUrlHistoryDone(scoped_ptr<ClientPhishingRequest> request,
143 const DoneCallback& callback,
144 bool success,
145 const history::URLRow& row,
146 const history::VisitVector& visits);
148 // HistoryService callback which is called when we're done querying HTTP host
149 // visits in the history.
150 void QueryHttpHostVisitsDone(scoped_ptr<ClientPhishingRequest> request,
151 const DoneCallback& callback,
152 bool success,
153 int num_visits,
154 base::Time first_visit);
156 // HistoryService callback which is called when we're done querying HTTPS host
157 // visits in the history.
158 void QueryHttpsHostVisitsDone(scoped_ptr<ClientPhishingRequest> request,
159 const DoneCallback& callback,
160 bool success,
161 int num_visits,
162 base::Time first_visit);
164 // Helper function which sets the host history features given the
165 // number of host visits and the time of the fist host visit. Set
166 // |is_http_query| to true if the URL scheme is HTTP and to false if
167 // the scheme is HTTPS.
168 void SetHostVisitsFeatures(int num_visits,
169 base::Time first_visit,
170 bool is_http_query,
171 ClientPhishingRequest* request);
173 // Helper function which gets the history server if possible. If the pointer
174 // is set it will return true and false otherwise.
175 bool GetHistoryService(HistoryService** history);
177 // Helper function which is called when we're done filtering out benign IPs
178 // on the IO thread. This function is called on the UI thread.
179 void FinishExtractMalwareFeatures(scoped_ptr<IPUrlMap> bad_ips,
180 MalwareDoneCallback callback,
181 scoped_ptr<ClientMalwareRequest> request);
183 content::WebContents* tab_;
184 ClientSideDetectionHost* host_;
185 base::CancelableTaskTracker cancelable_task_tracker_;
186 base::WeakPtrFactory<BrowserFeatureExtractor> weak_factory_;
188 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor);
191 } // namespace safe_browsing
192 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_