content/child/site_isolation_stats_gatherer.cc

   1 // Copyright 2015 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/child/site_isolation_stats_gatherer.h"
   6
   7 #include "base/metrics/histogram.h"
   8 #include "base/strings/string_piece.h"
   9 #include "base/strings/string_util.h"
  10 #include "content/public/common/resource_response_info.h"
  11 #include "net/http/http_response_headers.h"
  12
  13 namespace content {
  14
  15 namespace {
  16
  17 // The gathering of UMA stats for site isolation is deactivated by default, and
  18 // only activated in renderer processes.
  19 static bool g_stats_gathering_enabled = false;
  20
  21 bool IsRenderableStatusCode(int status_code) {
  22   // Chrome only uses the content of a response with one of these status codes
  23   // for CSS/JavaScript. For images, Chrome just ignores status code.
  24   const int renderable_status_code[] = {
  25       200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};
  26   for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
  27     if (renderable_status_code[i] == status_code)
  28       return true;
  29   }
  30   return false;
  31 }
  32
  33 void IncrementHistogramCount(const std::string& name) {
  34   // The default value of min, max, bucket_count are copied from histogram.h.
  35   base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
  36       name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
  37   histogram_pointer->Add(1);
  38 }
  39
  40 void IncrementHistogramEnum(const std::string& name,
  41                             uint32 sample,
  42                             uint32 boundary_value) {
  43   // The default value of min, max, bucket_count are copied from histogram.h.
  44   base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(
  45       name, 1, boundary_value, boundary_value + 1,
  46       base::HistogramBase::kUmaTargetedHistogramFlag);
  47   histogram_pointer->Add(sample);
  48 }
  49
  50 void HistogramCountBlockedResponse(
  51     const std::string& bucket_prefix,
  52     const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
  53     bool nosniff_block) {
  54   std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");
  55   IncrementHistogramCount(bucket_prefix + block_label);
  56
  57   // The content is blocked if it is sniffed as HTML/JSON/XML. When
  58   // the blocked response is with an error status code, it is not
  59   // disruptive for the following reasons : 1) the blocked content is
  60   // not a binary object (such as an image) since it is sniffed as
  61   // text; 2) then, this blocking only breaks the renderer behavior
  62   // only if it is either JavaScript or CSS. However, the renderer
  63   // doesn't use the contents of JS/CSS with unaffected status code
  64   // (e.g, 404). 3) the renderer is expected not to use the cross-site
  65   // document content for purposes other than JS/CSS (e.g, XHR).
  66   bool renderable_status_code =
  67       IsRenderableStatusCode(resp_data->http_status_code);
  68
  69   if (renderable_status_code) {
  70     IncrementHistogramEnum(
  71         bucket_prefix + block_label + ".RenderableStatusCode",
  72         resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);
  73   } else {
  74     IncrementHistogramCount(bucket_prefix + block_label +
  75                             ".NonRenderableStatusCode");
  76   }
  77 }
  78
  79 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,
  80                                       bool sniffed_as_js) {
  81   IncrementHistogramCount(bucket_prefix + ".NotBlocked");
  82   if (sniffed_as_js)
  83     IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
  84 }
  85
  86 }  // namespace
  87
  88 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {
  89 }
  90
  91 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {
  92   g_stats_gathering_enabled = enabled;
  93 }
  94
  95 linked_ptr<SiteIsolationResponseMetaData>
  96 SiteIsolationStatsGatherer::OnReceivedResponse(
  97     const GURL& frame_origin,
  98     const GURL& response_url,
  99     ResourceType resource_type,
 100     int origin_pid,
 101     const ResourceResponseInfo& info) {
 102   if (!g_stats_gathering_enabled)
 103     return linked_ptr<SiteIsolationResponseMetaData>();
 104
 105   // if |origin_pid| is non-zero, it means that this response is for a plugin
 106   // spawned from this renderer process. We exclude responses for plugins for
 107   // now, but eventually, we're going to make plugin processes directly talk to
 108   // the browser process so that we don't apply cross-site document blocking to
 109   // them.
 110   if (origin_pid)
 111     return linked_ptr<SiteIsolationResponseMetaData>();
 112
 113   UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
 114
 115   // See if this is for navigation. If it is, don't block it, under the
 116   // assumption that we will put it in an appropriate process.
 117   if (IsResourceTypeFrame(resource_type))
 118     return linked_ptr<SiteIsolationResponseMetaData>();
 119
 120   if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))
 121     return linked_ptr<SiteIsolationResponseMetaData>();
 122
 123   if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))
 124     return linked_ptr<SiteIsolationResponseMetaData>();
 125
 126   CrossSiteDocumentMimeType canonical_mime_type =
 127       CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);
 128
 129   if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)
 130     return linked_ptr<SiteIsolationResponseMetaData>();
 131
 132   // Every CORS request should have the Access-Control-Allow-Origin header even
 133   // if it is preceded by a pre-flight request. Therefore, if this is a CORS
 134   // request, it has this header.  response.httpHeaderField() internally uses
 135   // case-insensitive matching for the header name.
 136   std::string access_control_origin;
 137
 138   // We can use a case-insensitive header name for EnumerateHeader().
 139   info.headers->EnumerateHeader(NULL, "access-control-allow-origin",
 140                                 &access_control_origin);
 141   if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
 142           frame_origin, response_url, access_control_origin))
 143     return linked_ptr<SiteIsolationResponseMetaData>();
 144
 145   // Real XSD data collection starts from here.
 146   std::string no_sniff;
 147   info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
 148
 149   linked_ptr<SiteIsolationResponseMetaData> resp_data(
 150       new SiteIsolationResponseMetaData);
 151   resp_data->frame_origin = frame_origin.spec();
 152   resp_data->response_url = response_url;
 153   resp_data->resource_type = resource_type;
 154   resp_data->canonical_mime_type = canonical_mime_type;
 155   resp_data->http_status_code = info.headers->response_code();
 156   resp_data->no_sniff = base::LowerCaseEqualsASCII(no_sniff, "nosniff");
 157
 158   return resp_data;
 159 }
 160
 161 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
 162     const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
 163     const char* raw_data,
 164     int raw_length) {
 165   if (!g_stats_gathering_enabled)
 166     return false;
 167
 168   DCHECK(resp_data.get());
 169
 170   base::StringPiece data(raw_data, raw_length);
 171
 172   // Record the length of the first received chunk of data to see if it's enough
 173   // for sniffing.
 174   UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
 175
 176   // Record the number of cross-site document responses with a specific mime
 177   // type (text/html, text/xml, etc).
 178   UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",
 179                             resp_data->canonical_mime_type,
 180                             CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);
 181
 182   // Store the result of cross-site document blocking analysis.
 183   bool would_block = false;
 184   bool sniffed_as_js = SniffForJS(data);
 185
 186   // Record the number of responses whose content is sniffed for what its mime
 187   // type claims it to be. For example, we apply a HTML sniffer for a document
 188   // tagged with text/html here. Whenever this check becomes true, we'll block
 189   // the response.
 190   if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {
 191     std::string bucket_prefix;
 192     bool sniffed_as_target_document = false;
 193     if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {
 194       bucket_prefix = "SiteIsolation.XSD.HTML";
 195       sniffed_as_target_document =
 196           CrossSiteDocumentClassifier::SniffForHTML(data);
 197     } else if (resp_data->canonical_mime_type ==
 198                CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {
 199       bucket_prefix = "SiteIsolation.XSD.XML";
 200       sniffed_as_target_document =
 201           CrossSiteDocumentClassifier::SniffForXML(data);
 202     } else if (resp_data->canonical_mime_type ==
 203                CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {
 204       bucket_prefix = "SiteIsolation.XSD.JSON";
 205       sniffed_as_target_document =
 206           CrossSiteDocumentClassifier::SniffForJSON(data);
 207     } else {
 208       NOTREACHED() << "Not a blockable mime type: "
 209                    << resp_data->canonical_mime_type;
 210     }
 211
 212     if (sniffed_as_target_document) {
 213       would_block = true;
 214       HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
 215     } else {
 216       if (resp_data->no_sniff) {
 217         would_block = true;
 218         HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
 219       } else {
 220         HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
 221       }
 222     }
 223   } else {
 224     // This block is for plain text documents. We apply our HTML, XML,
 225     // and JSON sniffer to a text document in the order, and block it
 226     // if any of them succeeds in sniffing.
 227     std::string bucket_prefix;
 228     if (CrossSiteDocumentClassifier::SniffForHTML(data))
 229       bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
 230     else if (CrossSiteDocumentClassifier::SniffForXML(data))
 231       bucket_prefix = "SiteIsolation.XSD.Plain.XML";
 232     else if (CrossSiteDocumentClassifier::SniffForJSON(data))
 233       bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
 234
 235     if (bucket_prefix.size() > 0) {
 236       would_block = true;
 237       HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
 238     } else if (resp_data->no_sniff) {
 239       would_block = true;
 240       HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
 241     } else {
 242       HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
 243                                        sniffed_as_js);
 244     }
 245   }
 246
 247   return would_block;
 248 }
 249
 250 bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) {
 251   // The purpose of this function is to try to see if there's any possibility
 252   // that this data can be JavaScript (superset of JS). Search for "var " for JS
 253   // detection. This is a real hack and should only be used for stats gathering.
 254   return data.find("var ") != base::StringPiece::npos;
 255 }
 256
 257 }  // namespace content