[safe_browsing] Remove unused ContainsBrowseUrl() parameter.
[chromium-blink-merge.git] / content / child / site_isolation_policy.cc
blobd0f2ec4caaf832dc35aaa5e25c3e5feda7ab6f40
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/child/site_isolation_policy.h"
7 #include "base/basictypes.h"
8 #include "base/command_line.h"
9 #include "base/lazy_instance.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_util.h"
13 #include "content/public/common/content_switches.h"
14 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
15 #include "net/http/http_response_headers.h"
16 #include "webkit/common/resource_response_info.h"
18 using base::StringPiece;
20 namespace content {
22 namespace {
24 // The cross-site document blocking/UMA data collection is deactivated by
25 // default, and only activated in renderer processes.
26 static bool g_policy_enabled = false;
28 // MIME types
29 const char kTextHtml[] = "text/html";
30 const char kTextXml[] = "text/xml";
31 const char xAppRssXml[] = "application/rss+xml";
32 const char kAppXml[] = "application/xml";
33 const char kAppJson[] = "application/json";
34 const char kTextJson[] = "text/json";
35 const char kTextXjson[] = "text/x-json";
36 const char kTextPlain[] = "text/plain";
38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
39 // when this class is used for actual blocking.
40 bool IsRenderableStatusCode(int status_code) {
41 // Chrome only uses the content of a response with one of these status codes
42 // for CSS/JavaScript. For images, Chrome just ignores status code.
43 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300,
44 301, 302, 303, 305, 306, 307};
45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
46 if (renderable_status_code[i] == status_code)
47 return true;
49 return false;
52 bool MatchesSignature(StringPiece data,
53 const StringPiece signatures[],
54 size_t arr_size) {
56 size_t offset = data.find_first_not_of(" \t\r\n");
57 // There is no not-whitespace character in this document.
58 if (offset == base::StringPiece::npos)
59 return false;
61 data.remove_prefix(offset);
62 size_t length = data.length();
64 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
65 const StringPiece& signature = signatures[sig_index];
66 size_t signature_length = signature.length();
67 if (length < signature_length)
68 continue;
70 if (LowerCaseEqualsASCII(
71 data.begin(), data.begin() + signature_length, signature.data()))
72 return true;
74 return false;
77 void IncrementHistogramCount(const std::string& name) {
78 // The default value of min, max, bucket_count are copied from histogram.h.
79 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
80 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
81 histogram_pointer->Add(1);
84 void IncrementHistogramEnum(const std::string& name,
85 uint32 sample,
86 uint32 boundary_value) {
87 // The default value of min, max, bucket_count are copied from histogram.h.
88 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(
89 name,
91 boundary_value,
92 boundary_value + 1,
93 base::HistogramBase::kUmaTargetedHistogramFlag);
94 histogram_pointer->Add(sample);
97 void HistogramCountBlockedResponse(
98 const std::string& bucket_prefix,
99 linked_ptr<SiteIsolationResponseMetaData>& resp_data,
100 bool nosniff_block) {
101 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");
102 IncrementHistogramCount(bucket_prefix + block_label);
104 // The content is blocked if it is sniffed as HTML/JSON/XML. When
105 // the blocked response is with an error status code, it is not
106 // disruptive for the following reasons : 1) the blocked content is
107 // not a binary object (such as an image) since it is sniffed as
108 // text; 2) then, this blocking only breaks the renderer behavior
109 // only if it is either JavaScript or CSS. However, the renderer
110 // doesn't use the contents of JS/CSS with unaffected status code
111 // (e.g, 404). 3) the renderer is expected not to use the cross-site
112 // document content for purposes other than JS/CSS (e.g, XHR).
113 bool renderable_status_code =
114 IsRenderableStatusCode(resp_data->http_status_code);
116 if (renderable_status_code) {
117 IncrementHistogramEnum(
118 bucket_prefix + block_label + ".RenderableStatusCode",
119 resp_data->resource_type,
120 ResourceType::LAST_TYPE);
121 } else {
122 IncrementHistogramCount(bucket_prefix + block_label +
123 ".NonRenderableStatusCode");
127 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,
128 bool sniffed_as_js) {
129 IncrementHistogramCount(bucket_prefix + ".NotBlocked");
130 if (sniffed_as_js)
131 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
134 } // namespace
136 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {}
138 void SiteIsolationPolicy::SetPolicyEnabled(bool enabled) {
139 g_policy_enabled = enabled;
142 linked_ptr<SiteIsolationResponseMetaData>
143 SiteIsolationPolicy::OnReceivedResponse(
144 const GURL& frame_origin,
145 const GURL& response_url,
146 ResourceType::Type resource_type,
147 int origin_pid,
148 const webkit_glue::ResourceResponseInfo& info) {
149 if (!g_policy_enabled)
150 return linked_ptr<SiteIsolationResponseMetaData>();
152 // if |origin_pid| is non-zero, it means that this response is for a plugin
153 // spawned from this renderer process. We exclude responses for plugins for
154 // now, but eventually, we're going to make plugin processes directly talk to
155 // the browser process so that we don't apply cross-site document blocking to
156 // them.
157 if (origin_pid)
158 return linked_ptr<SiteIsolationResponseMetaData>();
160 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
162 // See if this is for navigation. If it is, don't block it, under the
163 // assumption that we will put it in an appropriate process.
164 if (ResourceType::IsFrame(resource_type))
165 return linked_ptr<SiteIsolationResponseMetaData>();
167 if (!IsBlockableScheme(response_url))
168 return linked_ptr<SiteIsolationResponseMetaData>();
170 if (IsSameSite(frame_origin, response_url))
171 return linked_ptr<SiteIsolationResponseMetaData>();
173 SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type =
174 GetCanonicalMimeType(info.mime_type);
176 if (canonical_mime_type == SiteIsolationResponseMetaData::Others)
177 return linked_ptr<SiteIsolationResponseMetaData>();
179 // Every CORS request should have the Access-Control-Allow-Origin header even
180 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
181 // request, it has this header. response.httpHeaderField() internally uses
182 // case-insensitive matching for the header name.
183 std::string access_control_origin;
185 // We can use a case-insensitive header name for EnumerateHeader().
186 info.headers->EnumerateHeader(
187 NULL, "access-control-allow-origin", &access_control_origin);
188 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin))
189 return linked_ptr<SiteIsolationResponseMetaData>();
191 // Real XSD data collection starts from here.
192 std::string no_sniff;
193 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
195 linked_ptr<SiteIsolationResponseMetaData> resp_data(
196 new SiteIsolationResponseMetaData);
197 resp_data->frame_origin = frame_origin.spec();
198 resp_data->response_url = response_url;
199 resp_data->resource_type = resource_type;
200 resp_data->canonical_mime_type = canonical_mime_type;
201 resp_data->http_status_code = info.headers->response_code();
202 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");
204 return resp_data;
207 bool SiteIsolationPolicy::ShouldBlockResponse(
208 linked_ptr<SiteIsolationResponseMetaData>& resp_data,
209 const char* raw_data,
210 int raw_length,
211 std::string* alternative_data) {
212 if (!g_policy_enabled)
213 return false;
215 DCHECK(resp_data.get());
217 StringPiece data(raw_data, raw_length);
219 // Record the length of the first received network packet to see if it's
220 // enough for sniffing.
221 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
223 // Record the number of cross-site document responses with a specific mime
224 // type (text/html, text/xml, etc).
225 UMA_HISTOGRAM_ENUMERATION(
226 "SiteIsolation.XSD.MimeType",
227 resp_data->canonical_mime_type,
228 SiteIsolationResponseMetaData::MaxCanonicalMimeType);
230 // Store the result of cross-site document blocking analysis.
231 bool is_blocked = false;
232 bool sniffed_as_js = SniffForJS(data);
234 // Record the number of responses whose content is sniffed for what its mime
235 // type claims it to be. For example, we apply a HTML sniffer for a document
236 // tagged with text/html here. Whenever this check becomes true, we'll block
237 // the response.
238 if (resp_data->canonical_mime_type !=
239 SiteIsolationResponseMetaData::Plain) {
240 std::string bucket_prefix;
241 bool sniffed_as_target_document = false;
242 if (resp_data->canonical_mime_type ==
243 SiteIsolationResponseMetaData::HTML) {
244 bucket_prefix = "SiteIsolation.XSD.HTML";
245 sniffed_as_target_document = SniffForHTML(data);
246 } else if (resp_data->canonical_mime_type ==
247 SiteIsolationResponseMetaData::XML) {
248 bucket_prefix = "SiteIsolation.XSD.XML";
249 sniffed_as_target_document = SniffForXML(data);
250 } else if (resp_data->canonical_mime_type ==
251 SiteIsolationResponseMetaData::JSON) {
252 bucket_prefix = "SiteIsolation.XSD.JSON";
253 sniffed_as_target_document = SniffForJSON(data);
254 } else {
255 NOTREACHED() << "Not a blockable mime type: "
256 << resp_data->canonical_mime_type;
259 if (sniffed_as_target_document) {
260 is_blocked = true;
261 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
262 } else {
263 if (resp_data->no_sniff) {
264 is_blocked = true;
265 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
266 } else {
267 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
270 } else {
271 // This block is for plain text documents. We apply our HTML, XML,
272 // and JSON sniffer to a text document in the order, and block it
273 // if any of them succeeds in sniffing.
274 std::string bucket_prefix;
275 if (SniffForHTML(data))
276 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
277 else if (SniffForXML(data))
278 bucket_prefix = "SiteIsolation.XSD.Plain.XML";
279 else if (SniffForJSON(data))
280 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
282 if (bucket_prefix.size() > 0) {
283 is_blocked = true;
284 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
285 } else if (resp_data->no_sniff) {
286 is_blocked = true;
287 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
288 } else {
289 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
290 sniffed_as_js);
294 if (!CommandLine::ForCurrentProcess()->HasSwitch(
295 switches::kBlockCrossSiteDocuments))
296 is_blocked = false;
298 if (is_blocked) {
299 alternative_data->erase();
300 alternative_data->insert(0, " ");
301 LOG(ERROR) << resp_data->response_url
302 << " is blocked as an illegal cross-site document from "
303 << resp_data->frame_origin;
305 return is_blocked;
308 SiteIsolationResponseMetaData::CanonicalMimeType
309 SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) {
310 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
311 return SiteIsolationResponseMetaData::HTML;
314 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {
315 return SiteIsolationResponseMetaData::Plain;
318 if (LowerCaseEqualsASCII(mime_type, kAppJson) ||
319 LowerCaseEqualsASCII(mime_type, kTextJson) ||
320 LowerCaseEqualsASCII(mime_type, kTextXjson)) {
321 return SiteIsolationResponseMetaData::JSON;
324 if (LowerCaseEqualsASCII(mime_type, kTextXml) ||
325 LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
326 LowerCaseEqualsASCII(mime_type, kAppXml)) {
327 return SiteIsolationResponseMetaData::XML;
330 return SiteIsolationResponseMetaData::Others;
333 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) {
334 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
335 // header which our policy depends on, so we cannot protect any
336 // document from FTP servers.
337 return url.SchemeIs("http") || url.SchemeIs("https");
340 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin,
341 const GURL& response_url) {
343 if (!frame_origin.is_valid() || !response_url.is_valid())
344 return false;
346 if (frame_origin.scheme() != response_url.scheme())
347 return false;
349 // SameDomainOrHost() extracts the effective domains (public suffix plus one)
350 // from the two URLs and compare them.
351 return net::registry_controlled_domains::SameDomainOrHost(
352 frame_origin,
353 response_url,
354 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
357 // We don't use Webkit's existing CORS policy implementation since
358 // their policy works in terms of origins, not sites. For example,
359 // when frame is sub.a.com and it is not allowed to access a document
360 // with sub1.a.com. But under Site Isolation, it's allowed.
361 bool SiteIsolationPolicy::IsValidCorsHeaderSet(
362 const GURL& frame_origin,
363 const GURL& website_origin,
364 const std::string& access_control_origin) {
365 // Many websites are sending back "\"*\"" instead of "*". This is
366 // non-standard practice, and not supported by Chrome. Refer to
367 // CrossOriginAccessControl::passesAccessControlCheck().
369 // TODO(dsjang): * is not allowed for the response from a request
370 // with cookies. This allows for more than what the renderer will
371 // eventually be able to receive, so we won't see illegal cross-site
372 // documents allowed by this. We have to find a way to see if this
373 // response is from a cookie-tagged request or not in the future.
374 if (access_control_origin == "*")
375 return true;
377 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
378 // "*", but many websites are using just a domain for access_control_origin,
379 // and this is blocked by Webkit's CORS logic here :
380 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
381 // is_valid() to false when it is created from a URL containing * in the
382 // domain part.
384 GURL cors_origin(access_control_origin);
385 return IsSameSite(frame_origin, cors_origin);
388 // This function is a slight modification of |net::SniffForHTML|.
389 bool SiteIsolationPolicy::SniffForHTML(StringPiece data) {
390 // The content sniffer used by Chrome and Firefox are using "<!--"
391 // as one of the HTML signatures, but it also appears in valid
392 // JavaScript, considered as well-formed JS by the browser. Since
393 // we do not want to block any JS, we exclude it from our HTML
394 // signatures. This can weaken our document block policy, but we can
395 // break less websites.
396 // TODO(dsjang): parameterize |net::SniffForHTML| with an option
397 // that decides whether to include <!-- or not, so that we can
398 // remove this function.
399 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser
400 // process, we should do single-thread checking here for the static
401 // initializer.
402 static const StringPiece kHtmlSignatures[] = {
403 StringPiece("<!DOCTYPE html"), // HTML5 spec
404 StringPiece("<script"), // HTML5 spec, Mozilla
405 StringPiece("<html"), // HTML5 spec, Mozilla
406 StringPiece("<head"), // HTML5 spec, Mozilla
407 StringPiece("<iframe"), // Mozilla
408 StringPiece("<h1"), // Mozilla
409 StringPiece("<div"), // Mozilla
410 StringPiece("<font"), // Mozilla
411 StringPiece("<table"), // Mozilla
412 StringPiece("<a"), // Mozilla
413 StringPiece("<style"), // Mozilla
414 StringPiece("<title"), // Mozilla
415 StringPiece("<b"), // Mozilla
416 StringPiece("<body"), // Mozilla
417 StringPiece("<br"), // Mozilla
418 StringPiece("<p"), // Mozilla
419 StringPiece("<?xml") // Mozilla
422 while (data.length() > 0) {
423 if (MatchesSignature(
424 data, kHtmlSignatures, arraysize(kHtmlSignatures)))
425 return true;
427 // If we cannot find "<!--", we fail sniffing this as HTML.
428 static const StringPiece kCommentBegins[] = { StringPiece("<!--") };
429 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))
430 break;
432 // Search for --> and do SniffForHTML after that. If we can find the
433 // comment's end, we start HTML sniffing from there again.
434 static const char kEndComment[] = "-->";
435 size_t offset = data.find(kEndComment);
436 if (offset == base::StringPiece::npos)
437 break;
439 // Proceed to the index next to the ending comment (-->).
440 data.remove_prefix(offset + strlen(kEndComment));
443 return false;
446 bool SiteIsolationPolicy::SniffForXML(base::StringPiece data) {
447 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
448 // this signature. However, XML is case-sensitive. Don't we have to
449 // be more lenient only to block documents starting with the exact
450 // string <?xml rather than <?XML ?
451 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser
452 // process, we should do single-thread checking here for the static
453 // initializer.
454 static const StringPiece kXmlSignatures[] = { StringPiece("<?xml") };
455 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));
458 bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data) {
459 // TODO(dsjang): We have to come up with a better way to sniff
460 // JSON. However, even RE cannot help us that much due to the fact
461 // that we don't do full parsing. This DFA starts with state 0, and
462 // finds {, "/' and : in that order. We're avoiding adding a
463 // dependency on a regular expression library.
464 enum {
465 kStartState,
466 kLeftBraceState,
467 kLeftQuoteState,
468 kColonState,
469 kTerminalState,
470 } state = kStartState;
472 size_t length = data.length();
473 for (size_t i = 0; i < length && state < kColonState; ++i) {
474 const char c = data[i];
475 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
476 continue;
478 switch (state) {
479 case kStartState:
480 if (c == '{')
481 state = kLeftBraceState;
482 else
483 state = kTerminalState;
484 break;
485 case kLeftBraceState:
486 if (c == '\"' || c == '\'')
487 state = kLeftQuoteState;
488 else
489 state = kTerminalState;
490 break;
491 case kLeftQuoteState:
492 if (c == ':')
493 state = kColonState;
494 break;
495 case kColonState:
496 case kTerminalState:
497 NOTREACHED();
498 break;
501 return state == kColonState;
504 bool SiteIsolationPolicy::SniffForJS(StringPiece data) {
505 // TODO(dsjang): This is a real hack. The only purpose of this function is to
506 // try to see if there's any possibility that this data can be JavaScript
507 // (superset of JS). This function will be removed once UMA stats are
508 // gathered.
510 // Search for "var " for JS detection.
511 return data.find("var ") != base::StringPiece::npos;
514 } // namespace content