1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // BrowserFeatureExtractor computes various browser features for client-side
6 // phishing detection. For now it does a bunch of lookups in the history
7 // service to see whether a particular URL has been visited before by the
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
19 #include "base/basictypes.h"
20 #include "base/callback.h"
21 #include "base/containers/hash_tables.h"
22 #include "base/memory/scoped_ptr.h"
23 #include "base/task/cancelable_task_tracker.h"
24 #include "base/time/time.h"
25 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
26 #include "chrome/browser/safe_browsing/ui_manager.h"
27 #include "components/history/core/browser/history_types.h"
28 #include "content/public/common/resource_type.h"
40 namespace safe_browsing
{
41 class ClientMalwareRequest
;
42 class ClientPhishingRequest
;
43 class ClientSideDetectionHost
;
46 // The url on the bad IP address.
50 content::ResourceType resource_type
;
52 IPUrlInfo(const std::string
& url
,
53 const std::string
& method
,
54 const std::string
& referrer
,
55 const content::ResourceType
& resource_type
);
59 typedef std::map
<std::string
, std::vector
<IPUrlInfo
> > IPUrlMap
;
62 // The URL we're currently browsing.
65 // List of IPv4 and IPv6 addresses from which content was requested
66 // together with the hosts on it, while browsing to the |url|.
69 // If a SafeBrowsing interstitial was shown for the current URL
70 // this will contain the UnsafeResource struct for that URL.
71 scoped_ptr
<SafeBrowsingUIManager::UnsafeResource
> unsafe_resource
;
73 // List of redirects that lead to the first page on the current host and
74 // the current url respectively. These may be the same if the current url
75 // is the first page on its host.
76 std::vector
<GURL
> host_redirects
;
77 std::vector
<GURL
> url_redirects
;
79 // URL of the referrer of this URL load.
82 // The HTTP status code from this navigation.
89 // All methods of this class must be called on the UI thread (including
91 class BrowserFeatureExtractor
{
93 // Called when feature extraction is done. The first argument will be
94 // true iff feature extraction succeeded. The second argument is the
95 // phishing request which was modified by the feature extractor. The
96 // DoneCallback takes ownership of the request object.
97 typedef base::Callback
<void(bool, scoped_ptr
<ClientPhishingRequest
>)>
99 typedef base::Callback
<void(bool, scoped_ptr
<ClientMalwareRequest
>)>
102 // The caller keeps ownership of the tab and host objects and is
103 // responsible for ensuring that they stay valid for the entire
104 // lifetime of this object.
105 BrowserFeatureExtractor(content::WebContents
* tab
,
106 ClientSideDetectionHost
* host
);
108 // The destructor will cancel any pending requests.
109 virtual ~BrowserFeatureExtractor();
111 // Begins extraction of the browser features. We take ownership
112 // of the request object until |callback| is called (see DoneCallback above)
113 // and will write the extracted features to the feature map. Once the
114 // feature extraction is complete, |callback| is run on the UI thread. We
115 // take ownership of the |callback| object. |info| may not be valid after
116 // ExtractFeatures returns. This method must run on the UI thread.
117 virtual void ExtractFeatures(const BrowseInfo
* info
,
118 ClientPhishingRequest
* request
,
119 const DoneCallback
& callback
);
121 // Begins extraction of the malware related features. We take ownership
122 // of the request object until |callback| is called. Once feature extraction
123 // is complete, |callback| will run on the UI thread. |info| is not expected
124 // to stay valid after ExtractMalwareFeatures returns. All IPs stored in
125 // |info| will be cleared by calling this function.
126 virtual void ExtractMalwareFeatures(BrowseInfo
* info
,
127 ClientMalwareRequest
* request
,
128 const MalwareDoneCallback
& callback
);
131 // Synchronous browser feature extraction.
132 void ExtractBrowseInfoFeatures(const BrowseInfo
& info
,
133 ClientPhishingRequest
* request
);
135 // Actually starts feature extraction (does the real work).
136 void StartExtractFeatures(scoped_ptr
<ClientPhishingRequest
> request
,
137 const DoneCallback
& callback
);
139 // HistoryService callback which is called when we're done querying URL visits
141 void QueryUrlHistoryDone(scoped_ptr
<ClientPhishingRequest
> request
,
142 const DoneCallback
& callback
,
144 const history::URLRow
& row
,
145 const history::VisitVector
& visits
);
147 // HistoryService callback which is called when we're done querying HTTP host
148 // visits in the history.
149 void QueryHttpHostVisitsDone(scoped_ptr
<ClientPhishingRequest
> request
,
150 const DoneCallback
& callback
,
153 base::Time first_visit
);
155 // HistoryService callback which is called when we're done querying HTTPS host
156 // visits in the history.
157 void QueryHttpsHostVisitsDone(scoped_ptr
<ClientPhishingRequest
> request
,
158 const DoneCallback
& callback
,
161 base::Time first_visit
);
163 // Helper function which sets the host history features given the
164 // number of host visits and the time of the fist host visit. Set
165 // |is_http_query| to true if the URL scheme is HTTP and to false if
166 // the scheme is HTTPS.
167 void SetHostVisitsFeatures(int num_visits
,
168 base::Time first_visit
,
170 ClientPhishingRequest
* request
);
172 // Helper function which gets the history server if possible. If the pointer
173 // is set it will return true and false otherwise.
174 bool GetHistoryService(history::HistoryService
** history
);
176 // Helper function which is called when we're done filtering out benign IPs
177 // on the IO thread. This function is called on the UI thread.
178 void FinishExtractMalwareFeatures(scoped_ptr
<IPUrlMap
> bad_ips
,
179 MalwareDoneCallback callback
,
180 scoped_ptr
<ClientMalwareRequest
> request
);
182 content::WebContents
* tab_
;
183 ClientSideDetectionHost
* host_
;
184 base::CancelableTaskTracker cancelable_task_tracker_
;
185 base::WeakPtrFactory
<BrowserFeatureExtractor
> weak_factory_
;
187 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor
);
190 } // namespace safe_browsing
191 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_