Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / content / child / site_isolation_stats_gatherer.cc
blobc51640eb393ac9343c0b066e2ea36e7ae6b66613
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/child/site_isolation_stats_gatherer.h"
7 #include "base/metrics/histogram.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/string_util.h"
10 #include "content/public/common/resource_response_info.h"
11 #include "net/http/http_response_headers.h"
13 namespace content {
15 namespace {
17 // The gathering of UMA stats for site isolation is deactivated by default, and
18 // only activated in renderer processes.
19 static bool g_stats_gathering_enabled = false;
21 bool IsRenderableStatusCode(int status_code) {
22 // Chrome only uses the content of a response with one of these status codes
23 // for CSS/JavaScript. For images, Chrome just ignores status code.
24 const int renderable_status_code[] = {
25 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};
26 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
27 if (renderable_status_code[i] == status_code)
28 return true;
30 return false;
33 void IncrementHistogramCount(const std::string& name) {
34 // The default value of min, max, bucket_count are copied from histogram.h.
35 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
36 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
37 histogram_pointer->Add(1);
40 void IncrementHistogramEnum(const std::string& name,
41 uint32 sample,
42 uint32 boundary_value) {
43 // The default value of min, max, bucket_count are copied from histogram.h.
44 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(
45 name, 1, boundary_value, boundary_value + 1,
46 base::HistogramBase::kUmaTargetedHistogramFlag);
47 histogram_pointer->Add(sample);
50 void HistogramCountBlockedResponse(
51 const std::string& bucket_prefix,
52 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
53 bool nosniff_block) {
54 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");
55 IncrementHistogramCount(bucket_prefix + block_label);
57 // The content is blocked if it is sniffed as HTML/JSON/XML. When
58 // the blocked response is with an error status code, it is not
59 // disruptive for the following reasons : 1) the blocked content is
60 // not a binary object (such as an image) since it is sniffed as
61 // text; 2) then, this blocking only breaks the renderer behavior
62 // only if it is either JavaScript or CSS. However, the renderer
63 // doesn't use the contents of JS/CSS with unaffected status code
64 // (e.g, 404). 3) the renderer is expected not to use the cross-site
65 // document content for purposes other than JS/CSS (e.g, XHR).
66 bool renderable_status_code =
67 IsRenderableStatusCode(resp_data->http_status_code);
69 if (renderable_status_code) {
70 IncrementHistogramEnum(
71 bucket_prefix + block_label + ".RenderableStatusCode",
72 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);
73 } else {
74 IncrementHistogramCount(bucket_prefix + block_label +
75 ".NonRenderableStatusCode");
79 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,
80 bool sniffed_as_js) {
81 IncrementHistogramCount(bucket_prefix + ".NotBlocked");
82 if (sniffed_as_js)
83 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
86 } // namespace
88 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {
91 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {
92 g_stats_gathering_enabled = enabled;
95 linked_ptr<SiteIsolationResponseMetaData>
96 SiteIsolationStatsGatherer::OnReceivedResponse(
97 const GURL& frame_origin,
98 const GURL& response_url,
99 ResourceType resource_type,
100 int origin_pid,
101 const ResourceResponseInfo& info) {
102 if (!g_stats_gathering_enabled)
103 return linked_ptr<SiteIsolationResponseMetaData>();
105 // if |origin_pid| is non-zero, it means that this response is for a plugin
106 // spawned from this renderer process. We exclude responses for plugins for
107 // now, but eventually, we're going to make plugin processes directly talk to
108 // the browser process so that we don't apply cross-site document blocking to
109 // them.
110 if (origin_pid)
111 return linked_ptr<SiteIsolationResponseMetaData>();
113 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
115 // See if this is for navigation. If it is, don't block it, under the
116 // assumption that we will put it in an appropriate process.
117 if (IsResourceTypeFrame(resource_type))
118 return linked_ptr<SiteIsolationResponseMetaData>();
120 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))
121 return linked_ptr<SiteIsolationResponseMetaData>();
123 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))
124 return linked_ptr<SiteIsolationResponseMetaData>();
126 CrossSiteDocumentMimeType canonical_mime_type =
127 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);
129 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)
130 return linked_ptr<SiteIsolationResponseMetaData>();
132 // Every CORS request should have the Access-Control-Allow-Origin header even
133 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
134 // request, it has this header. response.httpHeaderField() internally uses
135 // case-insensitive matching for the header name.
136 std::string access_control_origin;
138 // We can use a case-insensitive header name for EnumerateHeader().
139 info.headers->EnumerateHeader(NULL, "access-control-allow-origin",
140 &access_control_origin);
141 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
142 frame_origin, response_url, access_control_origin))
143 return linked_ptr<SiteIsolationResponseMetaData>();
145 // Real XSD data collection starts from here.
146 std::string no_sniff;
147 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
149 linked_ptr<SiteIsolationResponseMetaData> resp_data(
150 new SiteIsolationResponseMetaData);
151 resp_data->frame_origin = frame_origin.spec();
152 resp_data->response_url = response_url;
153 resp_data->resource_type = resource_type;
154 resp_data->canonical_mime_type = canonical_mime_type;
155 resp_data->http_status_code = info.headers->response_code();
156 resp_data->no_sniff = base::LowerCaseEqualsASCII(no_sniff, "nosniff");
158 return resp_data;
161 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
162 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
163 const char* raw_data,
164 int raw_length) {
165 if (!g_stats_gathering_enabled)
166 return false;
168 DCHECK(resp_data.get());
170 base::StringPiece data(raw_data, raw_length);
172 // Record the length of the first received chunk of data to see if it's enough
173 // for sniffing.
174 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
176 // Record the number of cross-site document responses with a specific mime
177 // type (text/html, text/xml, etc).
178 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",
179 resp_data->canonical_mime_type,
180 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);
182 // Store the result of cross-site document blocking analysis.
183 bool would_block = false;
184 bool sniffed_as_js = SniffForJS(data);
186 // Record the number of responses whose content is sniffed for what its mime
187 // type claims it to be. For example, we apply a HTML sniffer for a document
188 // tagged with text/html here. Whenever this check becomes true, we'll block
189 // the response.
190 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {
191 std::string bucket_prefix;
192 bool sniffed_as_target_document = false;
193 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {
194 bucket_prefix = "SiteIsolation.XSD.HTML";
195 sniffed_as_target_document =
196 CrossSiteDocumentClassifier::SniffForHTML(data);
197 } else if (resp_data->canonical_mime_type ==
198 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {
199 bucket_prefix = "SiteIsolation.XSD.XML";
200 sniffed_as_target_document =
201 CrossSiteDocumentClassifier::SniffForXML(data);
202 } else if (resp_data->canonical_mime_type ==
203 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {
204 bucket_prefix = "SiteIsolation.XSD.JSON";
205 sniffed_as_target_document =
206 CrossSiteDocumentClassifier::SniffForJSON(data);
207 } else {
208 NOTREACHED() << "Not a blockable mime type: "
209 << resp_data->canonical_mime_type;
212 if (sniffed_as_target_document) {
213 would_block = true;
214 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
215 } else {
216 if (resp_data->no_sniff) {
217 would_block = true;
218 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
219 } else {
220 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
223 } else {
224 // This block is for plain text documents. We apply our HTML, XML,
225 // and JSON sniffer to a text document in the order, and block it
226 // if any of them succeeds in sniffing.
227 std::string bucket_prefix;
228 if (CrossSiteDocumentClassifier::SniffForHTML(data))
229 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
230 else if (CrossSiteDocumentClassifier::SniffForXML(data))
231 bucket_prefix = "SiteIsolation.XSD.Plain.XML";
232 else if (CrossSiteDocumentClassifier::SniffForJSON(data))
233 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
235 if (bucket_prefix.size() > 0) {
236 would_block = true;
237 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
238 } else if (resp_data->no_sniff) {
239 would_block = true;
240 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
241 } else {
242 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
243 sniffed_as_js);
247 return would_block;
250 bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) {
251 // The purpose of this function is to try to see if there's any possibility
252 // that this data can be JavaScript (superset of JS). Search for "var " for JS
253 // detection. This is a real hack and should only be used for stats gathering.
254 return data.find("var ") != base::StringPiece::npos;
257 } // namespace content