content/child/multipart_response_delegate.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/child/multipart_response_delegate.h"
   6
   7 #include "base/logging.h"
   8 #include "base/memory/ref_counted.h"
   9 #include "base/strings/string_number_conversions.h"
  10 #include "base/strings/string_util.h"
  11 #include "net/base/net_util.h"
  12 #include "net/http/http_response_headers.h"
  13 #include "net/http/http_util.h"
  14 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
  15 #include "third_party/WebKit/public/platform/WebString.h"
  16 #include "third_party/WebKit/public/platform/WebURL.h"
  17 #include "third_party/WebKit/public/platform/WebURLLoaderClient.h"
  18
  19 using blink::WebHTTPHeaderVisitor;
  20 using blink::WebString;
  21 using blink::WebURLLoader;
  22 using blink::WebURLLoaderClient;
  23 using blink::WebURLResponse;
  24
  25 namespace content {
  26
  27 namespace {
  28
  29 // The list of response headers that we do not copy from the original
  30 // response when generating a WebURLResponse for a MIME payload.
  31 const char* kReplaceHeaders[] = {
  32   "content-type",
  33   "content-length",
  34   "content-disposition",
  35   "content-range",
  36   "range",
  37   "set-cookie"
  38 };
  39
  40 class HeaderCopier : public WebHTTPHeaderVisitor {
  41  public:
  42   HeaderCopier(WebURLResponse* response)
  43       : response_(response) {
  44   }
  45   virtual void visitHeader(const WebString& name, const WebString& value) {
  46     const std::string& name_utf8 = name.utf8();
  47     for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
  48       if (base::LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
  49         return;
  50     }
  51     response_->setHTTPHeaderField(name, value);
  52   }
  53  private:
  54   WebURLResponse* response_;
  55 };
  56
  57 }  // namespace
  58
  59 MultipartResponseDelegate::MultipartResponseDelegate(
  60     WebURLLoaderClient* client,
  61     WebURLLoader* loader,
  62     const WebURLResponse& response,
  63     const std::string& boundary)
  64     : client_(client),
  65       loader_(loader),
  66       original_response_(response),
  67       encoded_data_length_(0),
  68       boundary_("--"),
  69       first_received_data_(true),
  70       processing_headers_(false),
  71       stop_sending_(false),
  72       has_sent_first_response_(false) {
  73   // Some servers report a boundary prefixed with "--".  See bug 5786.
  74   if (base::StartsWithASCII(boundary, "--", true)) {
  75     boundary_.assign(boundary);
  76   } else {
  77     boundary_.append(boundary);
  78   }
  79 }
  80
  81 void MultipartResponseDelegate::OnReceivedData(const char* data,
  82                                                int data_len,
  83                                                int encoded_data_length) {
  84   // stop_sending_ means that we've already received the final boundary token.
  85   // The server should stop sending us data at this point, but if it does, we
  86   // just throw it away.
  87   if (stop_sending_)
  88     return;
  89
  90   data_.append(data, data_len);
  91   encoded_data_length_ += encoded_data_length;
  92   if (first_received_data_) {
  93     // Some servers don't send a boundary token before the first chunk of
  94     // data.  We handle this case anyway (Gecko does too).
  95     first_received_data_ = false;
  96
  97     // Eat leading \r\n
  98     int pos = PushOverLine(data_, 0);
  99     if (pos)
 100       data_ = data_.substr(pos);
 101
 102     if (data_.length() < boundary_.length() + 2) {
 103       // We don't have enough data yet to make a boundary token.  Just wait
 104       // until the next chunk of data arrives.
 105       first_received_data_ = true;
 106       return;
 107     }
 108
 109     if (0 != data_.compare(0, boundary_.length(), boundary_)) {
 110       data_ = boundary_ + "\n" + data_;
 111     }
 112   }
 113   DCHECK(!first_received_data_);
 114
 115   // Headers
 116   if (processing_headers_) {
 117     // Eat leading \r\n
 118     int pos = PushOverLine(data_, 0);
 119     if (pos)
 120       data_ = data_.substr(pos);
 121
 122     if (ParseHeaders()) {
 123       // Successfully parsed headers.
 124       processing_headers_ = false;
 125     } else {
 126       // Get more data before trying again.
 127       return;
 128     }
 129   }
 130   DCHECK(!processing_headers_);
 131
 132   size_t boundary_pos;
 133   while ((boundary_pos = FindBoundary()) != std::string::npos) {
 134     if (client_) {
 135       // Strip out trailing \n\r characters in the buffer preceding the
 136       // boundary on the same lines as Firefox.
 137       size_t data_length = boundary_pos;
 138       if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
 139         data_length--;
 140         if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
 141           data_length--;
 142         }
 143       }
 144       if (data_length > 0) {
 145         // Send the last data chunk.
 146         client_->didReceiveData(loader_,
 147                                 data_.data(),
 148                                 static_cast<int>(data_length),
 149                                 encoded_data_length_);
 150         encoded_data_length_ = 0;
 151       }
 152     }
 153     size_t boundary_end_pos = boundary_pos + boundary_.length();
 154     if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
 155       // This was the last boundary so we can stop processing.
 156       stop_sending_ = true;
 157       data_.clear();
 158       return;
 159     }
 160
 161     // We can now throw out data up through the boundary
 162     int offset = PushOverLine(data_, boundary_end_pos);
 163     data_ = data_.substr(boundary_end_pos + offset);
 164
 165     // Ok, back to parsing headers
 166     if (!ParseHeaders()) {
 167       processing_headers_ = true;
 168       break;
 169     }
 170   }
 171
 172   // At this point, we should send over any data we have, but keep enough data
 173   // buffered to handle a boundary that may have been truncated.
 174   if (!processing_headers_ && data_.length() > boundary_.length()) {
 175     // If the last character is a new line character, go ahead and just send
 176     // everything we have buffered.  This matches an optimization in Gecko.
 177     int send_length = data_.length() - boundary_.length();
 178     if (data_[data_.length() - 1] == '\n')
 179       send_length = data_.length();
 180     if (client_)
 181       client_->didReceiveData(loader_,
 182                               data_.data(),
 183                               send_length,
 184                               encoded_data_length_);
 185     data_ = data_.substr(send_length);
 186     encoded_data_length_ = 0;
 187   }
 188 }
 189
 190 void MultipartResponseDelegate::OnCompletedRequest() {
 191   // If we have any pending data and we're not in a header, go ahead and send
 192   // it to WebCore.
 193   if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
 194     client_->didReceiveData(loader_,
 195                             data_.data(),
 196                             static_cast<int>(data_.length()),
 197                             encoded_data_length_);
 198     encoded_data_length_ = 0;
 199   }
 200 }
 201
 202 int MultipartResponseDelegate::PushOverLine(const std::string& data,
 203                                             size_t pos) {
 204   int offset = 0;
 205   if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
 206     ++offset;
 207     if (pos + 1 < data.length() && data[pos + 1] == '\n')
 208       ++offset;
 209   }
 210   return offset;
 211 }
 212
 213 bool MultipartResponseDelegate::ParseHeaders() {
 214   int headers_end_pos = net::HttpUtil::LocateEndOfAdditionalHeaders(
 215       data_.c_str(), data_.size(), 0);
 216
 217   if (headers_end_pos < 0)
 218     return false;
 219
 220   // Eat headers and prepend a status line as is required by
 221   // HttpResponseHeaders.
 222   std::string headers("HTTP/1.1 200 OK\r\n");
 223   headers.append(data_, 0, headers_end_pos);
 224   data_ = data_.substr(headers_end_pos);
 225
 226   scoped_refptr<net::HttpResponseHeaders> response_headers =
 227       new net::HttpResponseHeaders(
 228           net::HttpUtil::AssembleRawHeaders(headers.c_str(), headers.size()));
 229
 230   // Create a WebURLResponse based on the original set of headers + the
 231   // replacement headers. We only replace the same few headers that gecko
 232   // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
 233   WebURLResponse response(original_response_.url());
 234
 235   std::string mime_type;
 236   response_headers->GetMimeType(&mime_type);
 237   response.setMIMEType(WebString::fromUTF8(mime_type));
 238
 239   std::string charset;
 240   response_headers->GetCharset(&charset);
 241   response.setTextEncodingName(WebString::fromUTF8(charset));
 242
 243   // Copy the response headers from the original response.
 244   HeaderCopier copier(&response);
 245   original_response_.visitHTTPHeaderFields(&copier);
 246
 247   // Replace original headers with multipart headers listed in kReplaceHeaders.
 248   for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
 249     std::string name(kReplaceHeaders[i]);
 250     std::string value;
 251     void* iterator = nullptr;
 252     while (response_headers->EnumerateHeader(&iterator, name, &value)) {
 253       response.addHTTPHeaderField(WebString::fromLatin1(name),
 254                                   WebString::fromLatin1(value));
 255     }
 256   }
 257   // To avoid recording every multipart load as a separate visit in
 258   // the history database, we want to keep track of whether the response
 259   // is part of a multipart payload.  We do want to record the first visit,
 260   // so we only set isMultipartPayload to true after the first visit.
 261   response.setIsMultipartPayload(has_sent_first_response_);
 262   has_sent_first_response_ = true;
 263   // Send the response!
 264   if (client_)
 265     client_->didReceiveResponse(loader_, response);
 266
 267   return true;
 268 }
 269
 270 // Boundaries are supposed to be preceeded with --, but it looks like gecko
 271 // doesn't require the dashes to exist.  See nsMultiMixedConv::FindToken.
 272 size_t MultipartResponseDelegate::FindBoundary() {
 273   size_t boundary_pos = data_.find(boundary_);
 274   if (boundary_pos != std::string::npos) {
 275     // Back up over -- for backwards compat
 276     // TODO(tc): Don't we only want to do this once?  Gecko code doesn't seem
 277     // to care.
 278     if (boundary_pos >= 2) {
 279       if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
 280         boundary_pos -= 2;
 281         boundary_ = "--" + boundary_;
 282       }
 283     }
 284   }
 285   return boundary_pos;
 286 }
 287
 288 bool MultipartResponseDelegate::ReadMultipartBoundary(
 289     const WebURLResponse& response,
 290     std::string* multipart_boundary) {
 291   std::string content_type =
 292       response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
 293
 294   size_t boundary_start_offset = content_type.find("boundary=");
 295   if (boundary_start_offset == std::string::npos)
 296     return false;
 297
 298   boundary_start_offset += strlen("boundary=");
 299
 300   size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
 301
 302   if (boundary_end_offset == std::string::npos)
 303     boundary_end_offset = content_type.length();
 304
 305   size_t boundary_length = boundary_end_offset - boundary_start_offset;
 306
 307   *multipart_boundary =
 308       content_type.substr(boundary_start_offset, boundary_length);
 309   // The byte range response can have quoted boundary strings. This is legal
 310   // as per MIME specifications. Individual data fragements however don't
 311   // contain quoted boundary strings.
 312   base::TrimString(*multipart_boundary, "\"", multipart_boundary);
 313   return true;
 314 }
 315
 316 bool MultipartResponseDelegate::ReadContentRanges(
 317     const WebURLResponse& response,
 318     int64* content_range_lower_bound,
 319     int64* content_range_upper_bound,
 320     int64* content_range_instance_size) {
 321
 322   std::string content_range = response.httpHeaderField("Content-Range").utf8();
 323   if (content_range.empty()) {
 324     content_range = response.httpHeaderField("Range").utf8();
 325   }
 326
 327   if (content_range.empty()) {
 328     DLOG(WARNING) << "Failed to read content range from response.";
 329     return false;
 330   }
 331
 332   size_t byte_range_lower_bound_start_offset = content_range.find(" ");
 333   if (byte_range_lower_bound_start_offset == std::string::npos) {
 334     return false;
 335   }
 336
 337   // Skip over the initial space.
 338   byte_range_lower_bound_start_offset++;
 339
 340   // Find the lower bound.
 341   size_t byte_range_lower_bound_end_offset =
 342       content_range.find("-", byte_range_lower_bound_start_offset);
 343   if (byte_range_lower_bound_end_offset == std::string::npos) {
 344     return false;
 345   }
 346
 347   size_t byte_range_lower_bound_characters =
 348       byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
 349   std::string byte_range_lower_bound =
 350       content_range.substr(byte_range_lower_bound_start_offset,
 351                            byte_range_lower_bound_characters);
 352
 353   // Find the upper bound.
 354   size_t byte_range_upper_bound_start_offset =
 355       byte_range_lower_bound_end_offset + 1;
 356
 357   size_t byte_range_upper_bound_end_offset =
 358       content_range.find("/", byte_range_upper_bound_start_offset);
 359   if (byte_range_upper_bound_end_offset == std::string::npos) {
 360     return false;
 361   }
 362
 363   size_t byte_range_upper_bound_characters =
 364       byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
 365   std::string byte_range_upper_bound =
 366       content_range.substr(byte_range_upper_bound_start_offset,
 367                            byte_range_upper_bound_characters);
 368
 369   // Find the instance size.
 370   size_t byte_range_instance_size_start_offset =
 371       byte_range_upper_bound_end_offset + 1;
 372
 373   size_t byte_range_instance_size_end_offset =
 374       content_range.length();
 375
 376   size_t byte_range_instance_size_characters =
 377       byte_range_instance_size_end_offset -
 378       byte_range_instance_size_start_offset;
 379   std::string byte_range_instance_size =
 380       content_range.substr(byte_range_instance_size_start_offset,
 381                            byte_range_instance_size_characters);
 382
 383   if (!base::StringToInt64(byte_range_lower_bound, content_range_lower_bound))
 384     return false;
 385   if (!base::StringToInt64(byte_range_upper_bound, content_range_upper_bound))
 386     return false;
 387   if (!base::StringToInt64(byte_range_instance_size,
 388                            content_range_instance_size)) {
 389     return false;
 390   }
 391   return true;
 392 }
 393
 394 }  // namespace content