1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Implementation of the MalwareDetails class.
7 #include "chrome/browser/safe_browsing/malware_details.h"
10 #include "base/lazy_instance.h"
11 #include "chrome/browser/net/chrome_url_request_context.h"
12 #include "chrome/browser/profiles/profile.h"
13 #include "chrome/browser/safe_browsing/malware_details_cache.h"
14 #include "chrome/browser/safe_browsing/malware_details_history.h"
15 #include "chrome/browser/safe_browsing/report.pb.h"
16 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
17 #include "content/public/browser/browser_thread.h"
18 #include "content/public/browser/navigation_controller.h"
19 #include "content/public/browser/navigation_entry.h"
20 #include "content/public/browser/render_view_host.h"
21 #include "content/public/browser/web_contents.h"
22 #include "net/base/io_buffer.h"
23 #include "net/disk_cache/disk_cache.h"
24 #include "net/url_request/url_request_context_getter.h"
26 using content::BrowserThread
;
27 using content::NavigationEntry
;
28 using content::WebContents
;
29 using safe_browsing::ClientMalwareReportRequest
;
31 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
32 static const uint32 kMaxDomNodes
= 500;
35 MalwareDetailsFactory
* MalwareDetails::factory_
= NULL
;
37 // The default MalwareDetailsFactory. Global, made a singleton so we
39 class MalwareDetailsFactoryImpl
40 : public MalwareDetailsFactory
{
42 virtual MalwareDetails
* CreateMalwareDetails(
43 SafeBrowsingUIManager
* ui_manager
,
44 WebContents
* web_contents
,
45 const SafeBrowsingUIManager::UnsafeResource
& unsafe_resource
) OVERRIDE
{
46 return new MalwareDetails(ui_manager
, web_contents
, unsafe_resource
);
50 friend struct base::DefaultLazyInstanceTraits
<
51 MalwareDetailsFactoryImpl
>;
53 MalwareDetailsFactoryImpl() { }
55 DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl
);
58 static base::LazyInstance
<MalwareDetailsFactoryImpl
>
59 g_malware_details_factory_impl
= LAZY_INSTANCE_INITIALIZER
;
61 // Create a MalwareDetails for the given tab.
63 MalwareDetails
* MalwareDetails::NewMalwareDetails(
64 SafeBrowsingUIManager
* ui_manager
,
65 WebContents
* web_contents
,
66 const UnsafeResource
& resource
) {
67 // Set up the factory if this has not been done already (tests do that
68 // before this method is called).
70 factory_
= g_malware_details_factory_impl
.Pointer();
71 return factory_
->CreateMalwareDetails(ui_manager
, web_contents
, resource
);
74 // Create a MalwareDetails for the given tab. Runs in the UI thread.
75 MalwareDetails::MalwareDetails(
76 SafeBrowsingUIManager
* ui_manager
,
77 content::WebContents
* web_contents
,
78 const UnsafeResource
& resource
)
79 : content::WebContentsObserver(web_contents
),
80 profile_(Profile::FromBrowserContext(web_contents
->GetBrowserContext())),
81 request_context_getter_(profile_
->GetRequestContext()),
82 ui_manager_(ui_manager
),
85 cache_collector_(new MalwareDetailsCacheCollector
),
87 new MalwareDetailsRedirectsCollector(profile_
)) {
91 MalwareDetails::~MalwareDetails() {
94 bool MalwareDetails::OnMessageReceived(const IPC::Message
& message
) {
96 IPC_BEGIN_MESSAGE_MAP(MalwareDetails
, message
)
97 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails
,
98 OnReceivedMalwareDOMDetails
)
99 IPC_MESSAGE_UNHANDLED(handled
= false)
100 IPC_END_MESSAGE_MAP()
104 bool MalwareDetails::IsPublicUrl(const GURL
& url
) const {
105 return url
.SchemeIs("http"); // TODO(panayiotis): also skip internal urls.
108 // Looks for a Resource for the given url in resources_. If found, it
109 // updates |resource|. Otherwise, it creates a new message, adds it to
110 // resources_ and updates |resource| to point to it.
111 ClientMalwareReportRequest::Resource
* MalwareDetails::FindOrCreateResource(
113 safe_browsing::ResourceMap::iterator it
= resources_
.find(url
.spec());
114 if (it
!= resources_
.end()) {
115 return it
->second
.get();
118 // Create the resource for |url|.
119 int id
= resources_
.size();
120 linked_ptr
<ClientMalwareReportRequest::Resource
> new_resource(
121 new ClientMalwareReportRequest::Resource());
122 new_resource
->set_url(url
.spec());
123 new_resource
->set_id(id
);
124 resources_
[url
.spec()] = new_resource
;
125 return new_resource
.get();
128 void MalwareDetails::AddUrl(const GURL
& url
,
130 const std::string
& tagname
,
131 const std::vector
<GURL
>* children
) {
132 if (!url
.is_valid() || !IsPublicUrl(url
))
135 // Find (or create) the resource for the url.
136 ClientMalwareReportRequest::Resource
* url_resource
=
137 FindOrCreateResource(url
);
138 if (!tagname
.empty()) {
139 url_resource
->set_tag_name(tagname
);
141 if (!parent
.is_empty() && IsPublicUrl(parent
)) {
142 // Add the resource for the parent.
143 ClientMalwareReportRequest::Resource
* parent_resource
=
144 FindOrCreateResource(parent
);
145 // Update the parent-child relation
146 url_resource
->set_parent_id(parent_resource
->id());
149 for (std::vector
<GURL
>::const_iterator it
= children
->begin();
150 it
!= children
->end(); it
++) {
151 ClientMalwareReportRequest::Resource
* child_resource
=
152 FindOrCreateResource(*it
);
153 url_resource
->add_child_ids(child_resource
->id());
158 void MalwareDetails::StartCollection() {
159 DVLOG(1) << "Starting to compute malware details.";
160 report_
.reset(new ClientMalwareReportRequest());
162 if (IsPublicUrl(resource_
.url
)) {
163 report_
->set_malware_url(resource_
.url
.spec());
166 GURL page_url
= web_contents()->GetURL();
167 if (IsPublicUrl(page_url
)) {
168 report_
->set_page_url(page_url
.spec());
172 NavigationEntry
* nav_entry
= web_contents()->GetController().GetActiveEntry();
174 referrer_url
= nav_entry
->GetReferrer().url
;
175 if (IsPublicUrl(referrer_url
)) {
176 report_
->set_referrer_url(referrer_url
.spec());
180 // Add the nodes, starting from the page url.
181 AddUrl(page_url
, GURL(), std::string(), NULL
);
183 // Add the resource_url and its original url, if non-empty and different.
184 if (!resource_
.original_url
.is_empty() &&
185 resource_
.url
!= resource_
.original_url
) {
186 // Add original_url, as the parent of resource_url.
187 AddUrl(resource_
.original_url
, GURL(), std::string(), NULL
);
188 AddUrl(resource_
.url
, resource_
.original_url
, std::string(), NULL
);
190 AddUrl(resource_
.url
, GURL(), std::string(), NULL
);
193 // Add the redirect urls, if non-empty. The redirect urls do not include the
194 // original url, but include the unsafe url which is the last one of the
195 // redirect urls chain
197 // Set the original url as the parent of the first redirect url if it's not
199 if (!resource_
.original_url
.is_empty()) {
200 parent_url
= resource_
.original_url
;
202 // Set the previous redirect url as the parent of the next one
203 for (unsigned int i
= 0; i
< resource_
.redirect_urls
.size(); ++i
) {
204 AddUrl(resource_
.redirect_urls
[i
], parent_url
, std::string(), NULL
);
205 parent_url
= resource_
.redirect_urls
[i
];
208 // Add the referrer url.
209 if (nav_entry
&& !referrer_url
.is_empty()) {
210 AddUrl(referrer_url
, GURL(), std::string(), NULL
);
213 // Get URLs of frames, scripts etc from the DOM.
214 // OnReceivedMalwareDOMDetails will be called when the renderer replies.
215 content::RenderViewHost
* view
= web_contents()->GetRenderViewHost();
216 view
->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view
->GetRoutingID()));
219 // When the renderer is done, this is called.
220 void MalwareDetails::OnReceivedMalwareDOMDetails(
221 const std::vector
<SafeBrowsingHostMsg_MalwareDOMDetails_Node
>& params
) {
222 // Schedule this in IO thread, so it doesn't conflict with future users
223 // of our data structures (eg GetSerializedReport).
224 BrowserThread::PostTask(
225 BrowserThread::IO
, FROM_HERE
,
226 base::Bind(&MalwareDetails::AddDOMDetails
, this, params
));
229 void MalwareDetails::AddDOMDetails(
230 const std::vector
<SafeBrowsingHostMsg_MalwareDOMDetails_Node
>& params
) {
231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
232 DVLOG(1) << "Nodes from the DOM: " << params
.size();
234 // If we have already started getting redirects from history service,
235 // don't modify state, otherwise will invalidate the iterators.
236 if (redirects_collector_
->HasStarted())
239 // If we have already started collecting data from the HTTP cache, don't
241 if (cache_collector_
->HasStarted())
244 // Add the urls from the DOM to |resources_|. The renderer could be
245 // sending bogus messages, so limit the number of nodes we accept.
246 for (uint32 i
= 0; i
< params
.size() && i
< kMaxDomNodes
; ++i
) {
247 SafeBrowsingHostMsg_MalwareDOMDetails_Node node
= params
[i
];
248 DVLOG(1) << node
.url
<< ", " << node
.tag_name
<< ", " << node
.parent
;
249 AddUrl(node
.url
, node
.parent
, node
.tag_name
, &(node
.children
));
253 // Called from the SB Service on the IO thread, after the user has
254 // closed the tab, or clicked proceed or goback. Since the user needs
255 // to take an action, we expect this to be called after
256 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
257 // the DOM data in our report.
258 void MalwareDetails::FinishCollection() {
259 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
261 std::vector
<GURL
> urls
;
262 for (safe_browsing::ResourceMap::const_iterator it
= resources_
.begin();
263 it
!= resources_
.end(); it
++) {
264 urls
.push_back(GURL(it
->first
));
266 redirects_collector_
->StartHistoryCollection(
268 base::Bind(&MalwareDetails::OnRedirectionCollectionReady
, this));
271 void MalwareDetails::OnRedirectionCollectionReady() {
272 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
273 const std::vector
<safe_browsing::RedirectChain
>& redirects
=
274 redirects_collector_
->GetCollectedUrls();
276 for (size_t i
= 0; i
< redirects
.size(); ++i
)
277 AddRedirectUrlList(redirects
[i
]);
279 // Call the cache collector
280 cache_collector_
->StartCacheCollection(
281 request_context_getter_
.get(),
284 base::Bind(&MalwareDetails::OnCacheCollectionReady
, this));
287 void MalwareDetails::AddRedirectUrlList(const std::vector
<GURL
>& urls
) {
288 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
289 for (size_t i
= 0; i
< urls
.size()-1; ++i
) {
290 AddUrl(urls
[i
], urls
[i
+ 1], std::string(), NULL
);
294 void MalwareDetails::OnCacheCollectionReady() {
295 DVLOG(1) << "OnCacheCollectionReady.";
296 // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
297 for (safe_browsing::ResourceMap::const_iterator it
= resources_
.begin();
298 it
!= resources_
.end(); it
++) {
299 ClientMalwareReportRequest::Resource
* pb_resource
=
300 report_
->add_resources();
301 pb_resource
->CopyFrom(*(it
->second
));
304 report_
->set_complete(cache_result_
);
306 // Send the report, using the SafeBrowsingService.
307 std::string serialized
;
308 if (!report_
->SerializeToString(&serialized
)) {
309 DLOG(ERROR
) << "Unable to serialize the malware report.";
313 ui_manager_
->SendSerializedMalwareDetails(serialized
);