NaCl docs: add sanitizers to GSoC ideas
[chromium-blink-merge.git] / chrome / browser / safe_browsing / malware_details.cc
blob30cecd85fbbf86640c9aa8b590e6c00e93b8eda0
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Implementation of the MalwareDetails class.
7 #include "chrome/browser/safe_browsing/malware_details.h"
9 #include "base/bind.h"
10 #include "base/lazy_instance.h"
11 #include "chrome/browser/profiles/profile.h"
12 #include "chrome/browser/safe_browsing/malware_details_cache.h"
13 #include "chrome/browser/safe_browsing/malware_details_history.h"
14 #include "chrome/browser/safe_browsing/report.pb.h"
15 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
16 #include "content/public/browser/browser_thread.h"
17 #include "content/public/browser/navigation_controller.h"
18 #include "content/public/browser/navigation_entry.h"
19 #include "content/public/browser/render_view_host.h"
20 #include "content/public/browser/web_contents.h"
21 #include "net/url_request/url_request_context_getter.h"
23 using content::BrowserThread;
24 using content::NavigationEntry;
25 using content::WebContents;
26 using safe_browsing::ClientMalwareReportRequest;
28 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
29 static const uint32 kMaxDomNodes = 500;
31 // static
32 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
34 // The default MalwareDetailsFactory. Global, made a singleton so we
35 // don't leak it.
36 class MalwareDetailsFactoryImpl : public MalwareDetailsFactory {
37 public:
38 MalwareDetails* CreateMalwareDetails(
39 SafeBrowsingUIManager* ui_manager,
40 WebContents* web_contents,
41 const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) override {
42 return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
45 private:
46 friend struct base::DefaultLazyInstanceTraits<MalwareDetailsFactoryImpl>;
48 MalwareDetailsFactoryImpl() {}
50 DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
53 static base::LazyInstance<MalwareDetailsFactoryImpl>
54 g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
56 // Create a MalwareDetails for the given tab.
57 /* static */
58 MalwareDetails* MalwareDetails::NewMalwareDetails(
59 SafeBrowsingUIManager* ui_manager,
60 WebContents* web_contents,
61 const UnsafeResource& resource) {
62 // Set up the factory if this has not been done already (tests do that
63 // before this method is called).
64 if (!factory_)
65 factory_ = g_malware_details_factory_impl.Pointer();
66 return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
69 // Create a MalwareDetails for the given tab. Runs in the UI thread.
70 MalwareDetails::MalwareDetails(
71 SafeBrowsingUIManager* ui_manager,
72 content::WebContents* web_contents,
73 const UnsafeResource& resource)
74 : content::WebContentsObserver(web_contents),
75 profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
76 request_context_getter_(profile_->GetRequestContext()),
77 ui_manager_(ui_manager),
78 resource_(resource),
79 cache_result_(false),
80 cache_collector_(new MalwareDetailsCacheCollector),
81 redirects_collector_(
82 new MalwareDetailsRedirectsCollector(profile_)) {
83 StartCollection();
86 MalwareDetails::~MalwareDetails() {
89 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
90 bool handled = true;
91 IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
92 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
93 OnReceivedMalwareDOMDetails)
94 IPC_MESSAGE_UNHANDLED(handled = false)
95 IPC_END_MESSAGE_MAP()
96 return handled;
99 bool MalwareDetails::IsReportableUrl(const GURL& url) const {
100 // TODO(panayiotis): also skip internal urls.
101 return url.SchemeIs("http") || url.SchemeIs("https");
104 // Looks for a Resource for the given url in resources_. If found, it
105 // updates |resource|. Otherwise, it creates a new message, adds it to
106 // resources_ and updates |resource| to point to it.
108 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
109 const GURL& url) {
110 safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
111 if (it != resources_.end())
112 return it->second.get();
114 // Create the resource for |url|.
115 int id = resources_.size();
116 linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
117 new ClientMalwareReportRequest::Resource());
118 new_resource->set_url(url.spec());
119 new_resource->set_id(id);
120 resources_[url.spec()] = new_resource;
121 return new_resource.get();
124 void MalwareDetails::AddUrl(const GURL& url,
125 const GURL& parent,
126 const std::string& tagname,
127 const std::vector<GURL>* children) {
128 if (!url.is_valid() || !IsReportableUrl(url))
129 return;
131 // Find (or create) the resource for the url.
132 ClientMalwareReportRequest::Resource* url_resource =
133 FindOrCreateResource(url);
134 if (!tagname.empty())
135 url_resource->set_tag_name(tagname);
136 if (!parent.is_empty() && IsReportableUrl(parent)) {
137 // Add the resource for the parent.
138 ClientMalwareReportRequest::Resource* parent_resource =
139 FindOrCreateResource(parent);
140 // Update the parent-child relation
141 url_resource->set_parent_id(parent_resource->id());
143 if (children) {
144 for (std::vector<GURL>::const_iterator it = children->begin();
145 it != children->end(); ++it) {
146 ClientMalwareReportRequest::Resource* child_resource =
147 FindOrCreateResource(*it);
148 url_resource->add_child_ids(child_resource->id());
153 void MalwareDetails::StartCollection() {
154 DVLOG(1) << "Starting to compute malware details.";
155 report_.reset(new ClientMalwareReportRequest());
157 if (IsReportableUrl(resource_.url))
158 report_->set_malware_url(resource_.url.spec());
160 GURL page_url = web_contents()->GetURL();
161 if (IsReportableUrl(page_url))
162 report_->set_page_url(page_url.spec());
164 GURL referrer_url;
165 NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
166 if (nav_entry) {
167 referrer_url = nav_entry->GetReferrer().url;
168 if (IsReportableUrl(referrer_url)) {
169 report_->set_referrer_url(referrer_url.spec());
173 // Add the nodes, starting from the page url.
174 AddUrl(page_url, GURL(), std::string(), NULL);
176 // Add the resource_url and its original url, if non-empty and different.
177 if (!resource_.original_url.is_empty() &&
178 resource_.url != resource_.original_url) {
179 // Add original_url, as the parent of resource_url.
180 AddUrl(resource_.original_url, GURL(), std::string(), NULL);
181 AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
182 } else {
183 AddUrl(resource_.url, GURL(), std::string(), NULL);
186 // Add the redirect urls, if non-empty. The redirect urls do not include the
187 // original url, but include the unsafe url which is the last one of the
188 // redirect urls chain
189 GURL parent_url;
190 // Set the original url as the parent of the first redirect url if it's not
191 // empty.
192 if (!resource_.original_url.is_empty())
193 parent_url = resource_.original_url;
195 // Set the previous redirect url as the parent of the next one
196 for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) {
197 AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
198 parent_url = resource_.redirect_urls[i];
201 // Add the referrer url.
202 if (nav_entry && !referrer_url.is_empty())
203 AddUrl(referrer_url, GURL(), std::string(), NULL);
205 // Get URLs of frames, scripts etc from the DOM.
206 // OnReceivedMalwareDOMDetails will be called when the renderer replies.
207 content::RenderViewHost* view = web_contents()->GetRenderViewHost();
208 view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
211 // When the renderer is done, this is called.
212 void MalwareDetails::OnReceivedMalwareDOMDetails(
213 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
214 // Schedule this in IO thread, so it doesn't conflict with future users
215 // of our data structures (eg GetSerializedReport).
216 BrowserThread::PostTask(
217 BrowserThread::IO, FROM_HERE,
218 base::Bind(&MalwareDetails::AddDOMDetails, this, params));
221 void MalwareDetails::AddDOMDetails(
222 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
223 DCHECK_CURRENTLY_ON(BrowserThread::IO);
224 DVLOG(1) << "Nodes from the DOM: " << params.size();
226 // If we have already started getting redirects from history service,
227 // don't modify state, otherwise will invalidate the iterators.
228 if (redirects_collector_->HasStarted())
229 return;
231 // If we have already started collecting data from the HTTP cache, don't
232 // modify our state.
233 if (cache_collector_->HasStarted())
234 return;
236 // Add the urls from the DOM to |resources_|. The renderer could be
237 // sending bogus messages, so limit the number of nodes we accept.
238 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
239 SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
240 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
241 AddUrl(node.url, node.parent, node.tag_name, &(node.children));
245 // Called from the SB Service on the IO thread, after the user has
246 // closed the tab, or clicked proceed or goback. Since the user needs
247 // to take an action, we expect this to be called after
248 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
249 // the DOM data in our report.
250 void MalwareDetails::FinishCollection() {
251 DCHECK_CURRENTLY_ON(BrowserThread::IO);
253 std::vector<GURL> urls;
254 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
255 it != resources_.end(); ++it) {
256 urls.push_back(GURL(it->first));
258 redirects_collector_->StartHistoryCollection(
259 urls,
260 base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
263 void MalwareDetails::OnRedirectionCollectionReady() {
264 DCHECK_CURRENTLY_ON(BrowserThread::IO);
265 const std::vector<safe_browsing::RedirectChain>& redirects =
266 redirects_collector_->GetCollectedUrls();
268 for (size_t i = 0; i < redirects.size(); ++i)
269 AddRedirectUrlList(redirects[i]);
271 // Call the cache collector
272 cache_collector_->StartCacheCollection(
273 request_context_getter_.get(),
274 &resources_,
275 &cache_result_,
276 base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
279 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
280 DCHECK_CURRENTLY_ON(BrowserThread::IO);
281 for (size_t i = 0; i < urls.size() - 1; ++i) {
282 AddUrl(urls[i], urls[i + 1], std::string(), NULL);
286 void MalwareDetails::OnCacheCollectionReady() {
287 DVLOG(1) << "OnCacheCollectionReady.";
288 // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
289 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
290 it != resources_.end(); ++it) {
291 ClientMalwareReportRequest::Resource* pb_resource =
292 report_->add_resources();
293 pb_resource->CopyFrom(*(it->second));
294 const GURL url(pb_resource->url());
295 if (url.SchemeIs("https")) {
296 // Don't report headers of HTTPS requests since they may contain private
297 // cookies. We still retain the full URL.
298 DVLOG(1) << "Clearing out HTTPS resource: " << pb_resource->url();
299 pb_resource->clear_request();
300 pb_resource->clear_response();
301 // Keep id, parent_id, child_ids, and tag_name.
304 report_->set_complete(cache_result_);
306 // Send the report, using the SafeBrowsingService.
307 std::string serialized;
308 if (!report_->SerializeToString(&serialized)) {
309 DLOG(ERROR) << "Unable to serialize the malware report.";
310 return;
313 ui_manager_->SendSerializedMalwareDetails(serialized);