1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/child/site_isolation_stats_gatherer.h"
7 #include "base/metrics/histogram.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/string_util.h"
10 #include "content/public/common/resource_response_info.h"
11 #include "net/http/http_response_headers.h"
17 // The gathering of UMA stats for site isolation is deactivated by default, and
18 // only activated in renderer processes.
19 static bool g_stats_gathering_enabled
= false;
21 bool IsRenderableStatusCode(int status_code
) {
22 // Chrome only uses the content of a response with one of these status codes
23 // for CSS/JavaScript. For images, Chrome just ignores status code.
24 const int renderable_status_code
[] = {
25 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};
26 for (size_t i
= 0; i
< arraysize(renderable_status_code
); ++i
) {
27 if (renderable_status_code
[i
] == status_code
)
33 void IncrementHistogramCount(const std::string
& name
) {
34 // The default value of min, max, bucket_count are copied from histogram.h.
35 base::HistogramBase
* histogram_pointer
= base::Histogram::FactoryGet(
36 name
, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag
);
37 histogram_pointer
->Add(1);
40 void IncrementHistogramEnum(const std::string
& name
,
42 uint32 boundary_value
) {
43 // The default value of min, max, bucket_count are copied from histogram.h.
44 base::HistogramBase
* histogram_pointer
= base::LinearHistogram::FactoryGet(
45 name
, 1, boundary_value
, boundary_value
+ 1,
46 base::HistogramBase::kUmaTargetedHistogramFlag
);
47 histogram_pointer
->Add(sample
);
50 void HistogramCountBlockedResponse(
51 const std::string
& bucket_prefix
,
52 const linked_ptr
<SiteIsolationResponseMetaData
>& resp_data
,
54 std::string
block_label(nosniff_block
? ".NoSniffBlocked" : ".Blocked");
55 IncrementHistogramCount(bucket_prefix
+ block_label
);
57 // The content is blocked if it is sniffed as HTML/JSON/XML. When
58 // the blocked response is with an error status code, it is not
59 // disruptive for the following reasons : 1) the blocked content is
60 // not a binary object (such as an image) since it is sniffed as
61 // text; 2) then, this blocking only breaks the renderer behavior
62 // only if it is either JavaScript or CSS. However, the renderer
63 // doesn't use the contents of JS/CSS with unaffected status code
64 // (e.g, 404). 3) the renderer is expected not to use the cross-site
65 // document content for purposes other than JS/CSS (e.g, XHR).
66 bool renderable_status_code
=
67 IsRenderableStatusCode(resp_data
->http_status_code
);
69 if (renderable_status_code
) {
70 IncrementHistogramEnum(
71 bucket_prefix
+ block_label
+ ".RenderableStatusCode",
72 resp_data
->resource_type
, RESOURCE_TYPE_LAST_TYPE
);
74 IncrementHistogramCount(bucket_prefix
+ block_label
+
75 ".NonRenderableStatusCode");
79 void HistogramCountNotBlockedResponse(const std::string
& bucket_prefix
,
81 IncrementHistogramCount(bucket_prefix
+ ".NotBlocked");
83 IncrementHistogramCount(bucket_prefix
+ ".NotBlocked.MaybeJS");
88 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {
91 void SiteIsolationStatsGatherer::SetEnabled(bool enabled
) {
92 g_stats_gathering_enabled
= enabled
;
95 linked_ptr
<SiteIsolationResponseMetaData
>
96 SiteIsolationStatsGatherer::OnReceivedResponse(
97 const GURL
& frame_origin
,
98 const GURL
& response_url
,
99 ResourceType resource_type
,
101 const ResourceResponseInfo
& info
) {
102 if (!g_stats_gathering_enabled
)
103 return linked_ptr
<SiteIsolationResponseMetaData
>();
105 // if |origin_pid| is non-zero, it means that this response is for a plugin
106 // spawned from this renderer process. We exclude responses for plugins for
107 // now, but eventually, we're going to make plugin processes directly talk to
108 // the browser process so that we don't apply cross-site document blocking to
111 return linked_ptr
<SiteIsolationResponseMetaData
>();
113 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
115 // See if this is for navigation. If it is, don't block it, under the
116 // assumption that we will put it in an appropriate process.
117 if (IsResourceTypeFrame(resource_type
))
118 return linked_ptr
<SiteIsolationResponseMetaData
>();
120 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url
))
121 return linked_ptr
<SiteIsolationResponseMetaData
>();
123 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin
, response_url
))
124 return linked_ptr
<SiteIsolationResponseMetaData
>();
126 CrossSiteDocumentMimeType canonical_mime_type
=
127 CrossSiteDocumentClassifier::GetCanonicalMimeType(info
.mime_type
);
129 if (canonical_mime_type
== CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS
)
130 return linked_ptr
<SiteIsolationResponseMetaData
>();
132 // Every CORS request should have the Access-Control-Allow-Origin header even
133 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
134 // request, it has this header. response.httpHeaderField() internally uses
135 // case-insensitive matching for the header name.
136 std::string access_control_origin
;
138 // We can use a case-insensitive header name for EnumerateHeader().
139 info
.headers
->EnumerateHeader(NULL
, "access-control-allow-origin",
140 &access_control_origin
);
141 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
142 frame_origin
, response_url
, access_control_origin
))
143 return linked_ptr
<SiteIsolationResponseMetaData
>();
145 // Real XSD data collection starts from here.
146 std::string no_sniff
;
147 info
.headers
->EnumerateHeader(NULL
, "x-content-type-options", &no_sniff
);
149 linked_ptr
<SiteIsolationResponseMetaData
> resp_data(
150 new SiteIsolationResponseMetaData
);
151 resp_data
->frame_origin
= frame_origin
.spec();
152 resp_data
->response_url
= response_url
;
153 resp_data
->resource_type
= resource_type
;
154 resp_data
->canonical_mime_type
= canonical_mime_type
;
155 resp_data
->http_status_code
= info
.headers
->response_code();
156 resp_data
->no_sniff
= base::LowerCaseEqualsASCII(no_sniff
, "nosniff");
161 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
162 const linked_ptr
<SiteIsolationResponseMetaData
>& resp_data
,
163 const char* raw_data
,
165 if (!g_stats_gathering_enabled
)
168 DCHECK(resp_data
.get());
170 base::StringPiece
data(raw_data
, raw_length
);
172 // Record the length of the first received chunk of data to see if it's enough
174 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length
);
176 // Record the number of cross-site document responses with a specific mime
177 // type (text/html, text/xml, etc).
178 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",
179 resp_data
->canonical_mime_type
,
180 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX
);
182 // Store the result of cross-site document blocking analysis.
183 bool would_block
= false;
184 bool sniffed_as_js
= SniffForJS(data
);
186 // Record the number of responses whose content is sniffed for what its mime
187 // type claims it to be. For example, we apply a HTML sniffer for a document
188 // tagged with text/html here. Whenever this check becomes true, we'll block
190 if (resp_data
->canonical_mime_type
!= CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN
) {
191 std::string bucket_prefix
;
192 bool sniffed_as_target_document
= false;
193 if (resp_data
->canonical_mime_type
== CROSS_SITE_DOCUMENT_MIME_TYPE_HTML
) {
194 bucket_prefix
= "SiteIsolation.XSD.HTML";
195 sniffed_as_target_document
=
196 CrossSiteDocumentClassifier::SniffForHTML(data
);
197 } else if (resp_data
->canonical_mime_type
==
198 CROSS_SITE_DOCUMENT_MIME_TYPE_XML
) {
199 bucket_prefix
= "SiteIsolation.XSD.XML";
200 sniffed_as_target_document
=
201 CrossSiteDocumentClassifier::SniffForXML(data
);
202 } else if (resp_data
->canonical_mime_type
==
203 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON
) {
204 bucket_prefix
= "SiteIsolation.XSD.JSON";
205 sniffed_as_target_document
=
206 CrossSiteDocumentClassifier::SniffForJSON(data
);
208 NOTREACHED() << "Not a blockable mime type: "
209 << resp_data
->canonical_mime_type
;
212 if (sniffed_as_target_document
) {
214 HistogramCountBlockedResponse(bucket_prefix
, resp_data
, false);
216 if (resp_data
->no_sniff
) {
218 HistogramCountBlockedResponse(bucket_prefix
, resp_data
, true);
220 HistogramCountNotBlockedResponse(bucket_prefix
, sniffed_as_js
);
224 // This block is for plain text documents. We apply our HTML, XML,
225 // and JSON sniffer to a text document in the order, and block it
226 // if any of them succeeds in sniffing.
227 std::string bucket_prefix
;
228 if (CrossSiteDocumentClassifier::SniffForHTML(data
))
229 bucket_prefix
= "SiteIsolation.XSD.Plain.HTML";
230 else if (CrossSiteDocumentClassifier::SniffForXML(data
))
231 bucket_prefix
= "SiteIsolation.XSD.Plain.XML";
232 else if (CrossSiteDocumentClassifier::SniffForJSON(data
))
233 bucket_prefix
= "SiteIsolation.XSD.Plain.JSON";
235 if (bucket_prefix
.size() > 0) {
237 HistogramCountBlockedResponse(bucket_prefix
, resp_data
, false);
238 } else if (resp_data
->no_sniff
) {
240 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data
, true);
242 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
250 bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data
) {
251 // The purpose of this function is to try to see if there's any possibility
252 // that this data can be JavaScript (superset of JS). Search for "var " for JS
253 // detection. This is a real hack and should only be used for stats gathering.
254 return data
.find("var ") != base::StringPiece::npos
;
257 } // namespace content