content/child/multipart_response_delegate.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/child/multipart_response_delegate.h"
   6
   7 #include "base/logging.h"
   8 #include "base/strings/string_number_conversions.h"
   9 #include "base/strings/string_util.h"
  10 #include "net/base/net_util.h"
  11 #include "net/http/http_util.h"
  12 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
  13 #include "third_party/WebKit/public/platform/WebString.h"
  14 #include "third_party/WebKit/public/platform/WebURL.h"
  15 #include "third_party/WebKit/public/platform/WebURLLoaderClient.h"
  16
  17 using blink::WebHTTPHeaderVisitor;
  18 using blink::WebString;
  19 using blink::WebURLLoader;
  20 using blink::WebURLLoaderClient;
  21 using blink::WebURLResponse;
  22
  23 namespace content {
  24
  25 namespace {
  26
  27 // The list of response headers that we do not copy from the original
  28 // response when generating a WebURLResponse for a MIME payload.
  29 const char* kReplaceHeaders[] = {
  30   "content-type",
  31   "content-length",
  32   "content-disposition",
  33   "content-range",
  34   "range",
  35   "set-cookie"
  36 };
  37
  38 class HeaderCopier : public WebHTTPHeaderVisitor {
  39  public:
  40   HeaderCopier(WebURLResponse* response)
  41       : response_(response) {
  42   }
  43   virtual void visitHeader(const WebString& name, const WebString& value) {
  44     const std::string& name_utf8 = name.utf8();
  45     for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
  46       if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
  47         return;
  48     }
  49     response_->setHTTPHeaderField(name, value);
  50   }
  51  private:
  52   WebURLResponse* response_;
  53 };
  54
  55 }  // namespace
  56
  57 MultipartResponseDelegate::MultipartResponseDelegate(
  58     WebURLLoaderClient* client,
  59     WebURLLoader* loader,
  60     const WebURLResponse& response,
  61     const std::string& boundary)
  62     : client_(client),
  63       loader_(loader),
  64       original_response_(response),
  65       encoded_data_length_(0),
  66       boundary_("--"),
  67       first_received_data_(true),
  68       processing_headers_(false),
  69       stop_sending_(false),
  70       has_sent_first_response_(false) {
  71   // Some servers report a boundary prefixed with "--".  See bug 5786.
  72   if (StartsWithASCII(boundary, "--", true)) {
  73     boundary_.assign(boundary);
  74   } else {
  75     boundary_.append(boundary);
  76   }
  77 }
  78
  79 void MultipartResponseDelegate::OnReceivedData(const char* data,
  80                                                int data_len,
  81                                                int encoded_data_length) {
  82   // stop_sending_ means that we've already received the final boundary token.
  83   // The server should stop sending us data at this point, but if it does, we
  84   // just throw it away.
  85   if (stop_sending_)
  86     return;
  87
  88   data_.append(data, data_len);
  89   encoded_data_length_ += encoded_data_length;
  90   if (first_received_data_) {
  91     // Some servers don't send a boundary token before the first chunk of
  92     // data.  We handle this case anyway (Gecko does too).
  93     first_received_data_ = false;
  94
  95     // Eat leading \r\n
  96     int pos = PushOverLine(data_, 0);
  97     if (pos)
  98       data_ = data_.substr(pos);
  99
 100     if (data_.length() < boundary_.length() + 2) {
 101       // We don't have enough data yet to make a boundary token.  Just wait
 102       // until the next chunk of data arrives.
 103       first_received_data_ = true;
 104       return;
 105     }
 106
 107     if (0 != data_.compare(0, boundary_.length(), boundary_)) {
 108       data_ = boundary_ + "\n" + data_;
 109     }
 110   }
 111   DCHECK(!first_received_data_);
 112
 113   // Headers
 114   if (processing_headers_) {
 115     // Eat leading \r\n
 116     int pos = PushOverLine(data_, 0);
 117     if (pos)
 118       data_ = data_.substr(pos);
 119
 120     if (ParseHeaders()) {
 121       // Successfully parsed headers.
 122       processing_headers_ = false;
 123     } else {
 124       // Get more data before trying again.
 125       return;
 126     }
 127   }
 128   DCHECK(!processing_headers_);
 129
 130   size_t boundary_pos;
 131   while ((boundary_pos = FindBoundary()) != std::string::npos) {
 132     if (client_) {
 133       // Strip out trailing \n\r characters in the buffer preceding the
 134       // boundary on the same lines as Firefox.
 135       size_t data_length = boundary_pos;
 136       if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
 137         data_length--;
 138         if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
 139           data_length--;
 140         }
 141       }
 142       if (data_length > 0) {
 143         // Send the last data chunk.
 144         client_->didReceiveData(loader_,
 145                                 data_.data(),
 146                                 static_cast<int>(data_length),
 147                                 encoded_data_length_);
 148         encoded_data_length_ = 0;
 149       }
 150     }
 151     size_t boundary_end_pos = boundary_pos + boundary_.length();
 152     if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
 153       // This was the last boundary so we can stop processing.
 154       stop_sending_ = true;
 155       data_.clear();
 156       return;
 157     }
 158
 159     // We can now throw out data up through the boundary
 160     int offset = PushOverLine(data_, boundary_end_pos);
 161     data_ = data_.substr(boundary_end_pos + offset);
 162
 163     // Ok, back to parsing headers
 164     if (!ParseHeaders()) {
 165       processing_headers_ = true;
 166       break;
 167     }
 168   }
 169
 170   // At this point, we should send over any data we have, but keep enough data
 171   // buffered to handle a boundary that may have been truncated.
 172   if (!processing_headers_ && data_.length() > boundary_.length()) {
 173     // If the last character is a new line character, go ahead and just send
 174     // everything we have buffered.  This matches an optimization in Gecko.
 175     int send_length = data_.length() - boundary_.length();
 176     if (data_[data_.length() - 1] == '\n')
 177       send_length = data_.length();
 178     if (client_)
 179       client_->didReceiveData(loader_,
 180                               data_.data(),
 181                               send_length,
 182                               encoded_data_length_);
 183     data_ = data_.substr(send_length);
 184     encoded_data_length_ = 0;
 185   }
 186 }
 187
 188 void MultipartResponseDelegate::OnCompletedRequest() {
 189   // If we have any pending data and we're not in a header, go ahead and send
 190   // it to WebCore.
 191   if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
 192     client_->didReceiveData(loader_,
 193                             data_.data(),
 194                             static_cast<int>(data_.length()),
 195                             encoded_data_length_);
 196     encoded_data_length_ = 0;
 197   }
 198 }
 199
 200 int MultipartResponseDelegate::PushOverLine(const std::string& data,
 201                                             size_t pos) {
 202   int offset = 0;
 203   if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
 204     ++offset;
 205     if (pos + 1 < data.length() && data[pos + 1] == '\n')
 206       ++offset;
 207   }
 208   return offset;
 209 }
 210
 211 bool MultipartResponseDelegate::ParseHeaders() {
 212   int line_feed_increment = 1;
 213
 214   // Grab the headers being liberal about line endings.
 215   size_t line_start_pos = 0;
 216   size_t line_end_pos = data_.find('\n');
 217   while (line_end_pos != std::string::npos) {
 218     // Handle CRLF
 219     if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
 220       line_feed_increment = 2;
 221       --line_end_pos;
 222     } else {
 223       line_feed_increment = 1;
 224     }
 225     if (line_start_pos == line_end_pos) {
 226       // A blank line, end of headers
 227       line_end_pos += line_feed_increment;
 228       break;
 229     }
 230     // Find the next header line.
 231     line_start_pos = line_end_pos + line_feed_increment;
 232     line_end_pos = data_.find('\n', line_start_pos);
 233   }
 234   // Truncated in the middle of a header, stop parsing.
 235   if (line_end_pos == std::string::npos)
 236     return false;
 237
 238   // Eat headers
 239   std::string headers("\n");
 240   headers.append(data_, 0, line_end_pos);
 241   data_ = data_.substr(line_end_pos);
 242
 243   // Create a WebURLResponse based on the original set of headers + the
 244   // replacement headers.  We only replace the same few headers that gecko
 245   // does.  See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
 246   std::string content_type = net::GetSpecificHeader(headers, "content-type");
 247   std::string mime_type;
 248   std::string charset;
 249   bool has_charset = false;
 250   net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
 251                                   &has_charset, NULL);
 252   WebURLResponse response(original_response_.url());
 253   response.setMIMEType(WebString::fromUTF8(mime_type));
 254   response.setTextEncodingName(WebString::fromUTF8(charset));
 255
 256   HeaderCopier copier(&response);
 257   original_response_.visitHTTPHeaderFields(&copier);
 258
 259   for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
 260     std::string name(kReplaceHeaders[i]);
 261     std::string value = net::GetSpecificHeader(headers, name);
 262     if (!value.empty()) {
 263       response.setHTTPHeaderField(WebString::fromUTF8(name),
 264                                   WebString::fromUTF8(value));
 265     }
 266   }
 267   // To avoid recording every multipart load as a separate visit in
 268   // the history database, we want to keep track of whether the response
 269   // is part of a multipart payload.  We do want to record the first visit,
 270   // so we only set isMultipartPayload to true after the first visit.
 271   response.setIsMultipartPayload(has_sent_first_response_);
 272   has_sent_first_response_ = true;
 273   // Send the response!
 274   if (client_)
 275     client_->didReceiveResponse(loader_, response);
 276
 277   return true;
 278 }
 279
 280 // Boundaries are supposed to be preceeded with --, but it looks like gecko
 281 // doesn't require the dashes to exist.  See nsMultiMixedConv::FindToken.
 282 size_t MultipartResponseDelegate::FindBoundary() {
 283   size_t boundary_pos = data_.find(boundary_);
 284   if (boundary_pos != std::string::npos) {
 285     // Back up over -- for backwards compat
 286     // TODO(tc): Don't we only want to do this once?  Gecko code doesn't seem
 287     // to care.
 288     if (boundary_pos >= 2) {
 289       if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
 290         boundary_pos -= 2;
 291         boundary_ = "--" + boundary_;
 292       }
 293     }
 294   }
 295   return boundary_pos;
 296 }
 297
 298 bool MultipartResponseDelegate::ReadMultipartBoundary(
 299     const WebURLResponse& response,
 300     std::string* multipart_boundary) {
 301   std::string content_type =
 302       response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
 303
 304   size_t boundary_start_offset = content_type.find("boundary=");
 305   if (boundary_start_offset == std::string::npos)
 306     return false;
 307
 308   boundary_start_offset += strlen("boundary=");
 309
 310   size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
 311
 312   if (boundary_end_offset == std::string::npos)
 313     boundary_end_offset = content_type.length();
 314
 315   size_t boundary_length = boundary_end_offset - boundary_start_offset;
 316
 317   *multipart_boundary =
 318       content_type.substr(boundary_start_offset, boundary_length);
 319   // The byte range response can have quoted boundary strings. This is legal
 320   // as per MIME specifications. Individual data fragements however don't
 321   // contain quoted boundary strings.
 322   base::TrimString(*multipart_boundary, "\"", multipart_boundary);
 323   return true;
 324 }
 325
 326 bool MultipartResponseDelegate::ReadContentRanges(
 327     const WebURLResponse& response,
 328     int64* content_range_lower_bound,
 329     int64* content_range_upper_bound,
 330     int64* content_range_instance_size) {
 331
 332   std::string content_range = response.httpHeaderField("Content-Range").utf8();
 333   if (content_range.empty()) {
 334     content_range = response.httpHeaderField("Range").utf8();
 335   }
 336
 337   if (content_range.empty()) {
 338     DLOG(WARNING) << "Failed to read content range from response.";
 339     return false;
 340   }
 341
 342   size_t byte_range_lower_bound_start_offset = content_range.find(" ");
 343   if (byte_range_lower_bound_start_offset == std::string::npos) {
 344     return false;
 345   }
 346
 347   // Skip over the initial space.
 348   byte_range_lower_bound_start_offset++;
 349
 350   // Find the lower bound.
 351   size_t byte_range_lower_bound_end_offset =
 352       content_range.find("-", byte_range_lower_bound_start_offset);
 353   if (byte_range_lower_bound_end_offset == std::string::npos) {
 354     return false;
 355   }
 356
 357   size_t byte_range_lower_bound_characters =
 358       byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
 359   std::string byte_range_lower_bound =
 360       content_range.substr(byte_range_lower_bound_start_offset,
 361                            byte_range_lower_bound_characters);
 362
 363   // Find the upper bound.
 364   size_t byte_range_upper_bound_start_offset =
 365       byte_range_lower_bound_end_offset + 1;
 366
 367   size_t byte_range_upper_bound_end_offset =
 368       content_range.find("/", byte_range_upper_bound_start_offset);
 369   if (byte_range_upper_bound_end_offset == std::string::npos) {
 370     return false;
 371   }
 372
 373   size_t byte_range_upper_bound_characters =
 374       byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
 375   std::string byte_range_upper_bound =
 376       content_range.substr(byte_range_upper_bound_start_offset,
 377                            byte_range_upper_bound_characters);
 378
 379   // Find the instance size.
 380   size_t byte_range_instance_size_start_offset =
 381       byte_range_upper_bound_end_offset + 1;
 382
 383   size_t byte_range_instance_size_end_offset =
 384       content_range.length();
 385
 386   size_t byte_range_instance_size_characters =
 387       byte_range_instance_size_end_offset -
 388       byte_range_instance_size_start_offset;
 389   std::string byte_range_instance_size =
 390       content_range.substr(byte_range_instance_size_start_offset,
 391                            byte_range_instance_size_characters);
 392
 393   if (!base::StringToInt64(byte_range_lower_bound, content_range_lower_bound))
 394     return false;
 395   if (!base::StringToInt64(byte_range_upper_bound, content_range_upper_bound))
 396     return false;
 397   if (!base::StringToInt64(byte_range_instance_size,
 398                            content_range_instance_size)) {
 399     return false;
 400   }
 401   return true;
 402 }
 403
 404 }  // namespace content