1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/child/multipart_response_delegate.h"
7 #include "base/logging.h"
8 #include "base/memory/ref_counted.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_util.h"
11 #include "net/base/net_util.h"
12 #include "net/http/http_response_headers.h"
13 #include "net/http/http_util.h"
14 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
15 #include "third_party/WebKit/public/platform/WebString.h"
16 #include "third_party/WebKit/public/platform/WebURL.h"
17 #include "third_party/WebKit/public/platform/WebURLLoaderClient.h"
19 using blink::WebHTTPHeaderVisitor
;
20 using blink::WebString
;
21 using blink::WebURLLoader
;
22 using blink::WebURLLoaderClient
;
23 using blink::WebURLResponse
;
29 // The list of response headers that we do not copy from the original
30 // response when generating a WebURLResponse for a MIME payload.
31 const char* kReplaceHeaders
[] = {
34 "content-disposition",
40 class HeaderCopier
: public WebHTTPHeaderVisitor
{
42 HeaderCopier(WebURLResponse
* response
)
43 : response_(response
) {
45 virtual void visitHeader(const WebString
& name
, const WebString
& value
) {
46 const std::string
& name_utf8
= name
.utf8();
47 for (size_t i
= 0; i
< arraysize(kReplaceHeaders
); ++i
) {
48 if (base::LowerCaseEqualsASCII(name_utf8
, kReplaceHeaders
[i
]))
51 response_
->setHTTPHeaderField(name
, value
);
54 WebURLResponse
* response_
;
59 MultipartResponseDelegate::MultipartResponseDelegate(
60 WebURLLoaderClient
* client
,
62 const WebURLResponse
& response
,
63 const std::string
& boundary
)
66 original_response_(response
),
67 encoded_data_length_(0),
69 first_received_data_(true),
70 processing_headers_(false),
72 has_sent_first_response_(false) {
73 // Some servers report a boundary prefixed with "--". See bug 5786.
74 if (base::StartsWith(boundary
, "--", base::CompareCase::SENSITIVE
)) {
75 boundary_
.assign(boundary
);
77 boundary_
.append(boundary
);
81 void MultipartResponseDelegate::OnReceivedData(const char* data
,
83 int encoded_data_length
) {
84 // stop_sending_ means that we've already received the final boundary token.
85 // The server should stop sending us data at this point, but if it does, we
86 // just throw it away.
90 data_
.append(data
, data_len
);
91 encoded_data_length_
+= encoded_data_length
;
92 if (first_received_data_
) {
93 // Some servers don't send a boundary token before the first chunk of
94 // data. We handle this case anyway (Gecko does too).
95 first_received_data_
= false;
98 int pos
= PushOverLine(data_
, 0);
100 data_
= data_
.substr(pos
);
102 if (data_
.length() < boundary_
.length() + 2) {
103 // We don't have enough data yet to make a boundary token. Just wait
104 // until the next chunk of data arrives.
105 first_received_data_
= true;
109 if (0 != data_
.compare(0, boundary_
.length(), boundary_
)) {
110 data_
= boundary_
+ "\n" + data_
;
113 DCHECK(!first_received_data_
);
116 if (processing_headers_
) {
118 int pos
= PushOverLine(data_
, 0);
120 data_
= data_
.substr(pos
);
122 if (ParseHeaders()) {
123 // Successfully parsed headers.
124 processing_headers_
= false;
126 // Get more data before trying again.
130 DCHECK(!processing_headers_
);
133 while ((boundary_pos
= FindBoundary()) != std::string::npos
) {
135 // Strip out trailing \n\r characters in the buffer preceding the
136 // boundary on the same lines as Firefox.
137 size_t data_length
= boundary_pos
;
138 if (boundary_pos
> 0 && data_
[boundary_pos
- 1] == '\n') {
140 if (boundary_pos
> 1 && data_
[boundary_pos
- 2] == '\r') {
144 if (data_length
> 0) {
145 // Send the last data chunk.
146 client_
->didReceiveData(loader_
,
148 static_cast<int>(data_length
),
149 encoded_data_length_
);
150 encoded_data_length_
= 0;
153 size_t boundary_end_pos
= boundary_pos
+ boundary_
.length();
154 if (boundary_end_pos
< data_
.length() && '-' == data_
[boundary_end_pos
]) {
155 // This was the last boundary so we can stop processing.
156 stop_sending_
= true;
161 // We can now throw out data up through the boundary
162 int offset
= PushOverLine(data_
, boundary_end_pos
);
163 data_
= data_
.substr(boundary_end_pos
+ offset
);
165 // Ok, back to parsing headers
166 if (!ParseHeaders()) {
167 processing_headers_
= true;
172 // At this point, we should send over any data we have, but keep enough data
173 // buffered to handle a boundary that may have been truncated.
174 if (!processing_headers_
&& data_
.length() > boundary_
.length()) {
175 // If the last character is a new line character, go ahead and just send
176 // everything we have buffered. This matches an optimization in Gecko.
177 int send_length
= data_
.length() - boundary_
.length();
178 if (data_
[data_
.length() - 1] == '\n')
179 send_length
= data_
.length();
181 client_
->didReceiveData(loader_
,
184 encoded_data_length_
);
185 data_
= data_
.substr(send_length
);
186 encoded_data_length_
= 0;
190 void MultipartResponseDelegate::OnCompletedRequest() {
191 // If we have any pending data and we're not in a header, go ahead and send
193 if (!processing_headers_
&& !data_
.empty() && !stop_sending_
&& client_
) {
194 client_
->didReceiveData(loader_
,
196 static_cast<int>(data_
.length()),
197 encoded_data_length_
);
198 encoded_data_length_
= 0;
202 int MultipartResponseDelegate::PushOverLine(const std::string
& data
,
205 if (pos
< data
.length() && (data
[pos
] == '\r' || data
[pos
] == '\n')) {
207 if (pos
+ 1 < data
.length() && data
[pos
+ 1] == '\n')
213 bool MultipartResponseDelegate::ParseHeaders() {
214 int headers_end_pos
= net::HttpUtil::LocateEndOfAdditionalHeaders(
215 data_
.c_str(), data_
.size(), 0);
217 if (headers_end_pos
< 0)
220 // Eat headers and prepend a status line as is required by
221 // HttpResponseHeaders.
222 std::string
headers("HTTP/1.1 200 OK\r\n");
223 headers
.append(data_
, 0, headers_end_pos
);
224 data_
= data_
.substr(headers_end_pos
);
226 scoped_refptr
<net::HttpResponseHeaders
> response_headers
=
227 new net::HttpResponseHeaders(
228 net::HttpUtil::AssembleRawHeaders(headers
.c_str(), headers
.size()));
230 // Create a WebURLResponse based on the original set of headers + the
231 // replacement headers. We only replace the same few headers that gecko
232 // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
233 WebURLResponse
response(original_response_
.url());
235 std::string mime_type
;
236 response_headers
->GetMimeType(&mime_type
);
237 response
.setMIMEType(WebString::fromUTF8(mime_type
));
240 response_headers
->GetCharset(&charset
);
241 response
.setTextEncodingName(WebString::fromUTF8(charset
));
243 // Copy the response headers from the original response.
244 HeaderCopier
copier(&response
);
245 original_response_
.visitHTTPHeaderFields(&copier
);
247 // Replace original headers with multipart headers listed in kReplaceHeaders.
248 for (size_t i
= 0; i
< arraysize(kReplaceHeaders
); ++i
) {
249 std::string
name(kReplaceHeaders
[i
]);
251 void* iterator
= nullptr;
252 while (response_headers
->EnumerateHeader(&iterator
, name
, &value
)) {
253 response
.addHTTPHeaderField(WebString::fromLatin1(name
),
254 WebString::fromLatin1(value
));
257 // To avoid recording every multipart load as a separate visit in
258 // the history database, we want to keep track of whether the response
259 // is part of a multipart payload. We do want to record the first visit,
260 // so we only set isMultipartPayload to true after the first visit.
261 response
.setIsMultipartPayload(has_sent_first_response_
);
262 has_sent_first_response_
= true;
263 // Send the response!
265 client_
->didReceiveResponse(loader_
, response
);
270 // Boundaries are supposed to be preceeded with --, but it looks like gecko
271 // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken.
272 size_t MultipartResponseDelegate::FindBoundary() {
273 size_t boundary_pos
= data_
.find(boundary_
);
274 if (boundary_pos
!= std::string::npos
) {
275 // Back up over -- for backwards compat
276 // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem
278 if (boundary_pos
>= 2) {
279 if ('-' == data_
[boundary_pos
- 1] && '-' == data_
[boundary_pos
- 2]) {
281 boundary_
= "--" + boundary_
;
288 bool MultipartResponseDelegate::ReadMultipartBoundary(
289 const WebURLResponse
& response
,
290 std::string
* multipart_boundary
) {
291 std::string content_type
=
292 response
.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
294 size_t boundary_start_offset
= content_type
.find("boundary=");
295 if (boundary_start_offset
== std::string::npos
)
298 boundary_start_offset
+= strlen("boundary=");
300 size_t boundary_end_offset
= content_type
.find(';', boundary_start_offset
);
302 if (boundary_end_offset
== std::string::npos
)
303 boundary_end_offset
= content_type
.length();
305 size_t boundary_length
= boundary_end_offset
- boundary_start_offset
;
307 *multipart_boundary
=
308 content_type
.substr(boundary_start_offset
, boundary_length
);
309 // The byte range response can have quoted boundary strings. This is legal
310 // as per MIME specifications. Individual data fragements however don't
311 // contain quoted boundary strings.
312 base::TrimString(*multipart_boundary
, "\"", multipart_boundary
);
316 bool MultipartResponseDelegate::ReadContentRanges(
317 const WebURLResponse
& response
,
318 int64
* content_range_lower_bound
,
319 int64
* content_range_upper_bound
,
320 int64
* content_range_instance_size
) {
322 std::string content_range
= response
.httpHeaderField("Content-Range").utf8();
323 if (content_range
.empty()) {
324 content_range
= response
.httpHeaderField("Range").utf8();
327 if (content_range
.empty()) {
328 DLOG(WARNING
) << "Failed to read content range from response.";
332 size_t byte_range_lower_bound_start_offset
= content_range
.find(" ");
333 if (byte_range_lower_bound_start_offset
== std::string::npos
) {
337 // Skip over the initial space.
338 byte_range_lower_bound_start_offset
++;
340 // Find the lower bound.
341 size_t byte_range_lower_bound_end_offset
=
342 content_range
.find("-", byte_range_lower_bound_start_offset
);
343 if (byte_range_lower_bound_end_offset
== std::string::npos
) {
347 size_t byte_range_lower_bound_characters
=
348 byte_range_lower_bound_end_offset
- byte_range_lower_bound_start_offset
;
349 std::string byte_range_lower_bound
=
350 content_range
.substr(byte_range_lower_bound_start_offset
,
351 byte_range_lower_bound_characters
);
353 // Find the upper bound.
354 size_t byte_range_upper_bound_start_offset
=
355 byte_range_lower_bound_end_offset
+ 1;
357 size_t byte_range_upper_bound_end_offset
=
358 content_range
.find("/", byte_range_upper_bound_start_offset
);
359 if (byte_range_upper_bound_end_offset
== std::string::npos
) {
363 size_t byte_range_upper_bound_characters
=
364 byte_range_upper_bound_end_offset
- byte_range_upper_bound_start_offset
;
365 std::string byte_range_upper_bound
=
366 content_range
.substr(byte_range_upper_bound_start_offset
,
367 byte_range_upper_bound_characters
);
369 // Find the instance size.
370 size_t byte_range_instance_size_start_offset
=
371 byte_range_upper_bound_end_offset
+ 1;
373 size_t byte_range_instance_size_end_offset
=
374 content_range
.length();
376 size_t byte_range_instance_size_characters
=
377 byte_range_instance_size_end_offset
-
378 byte_range_instance_size_start_offset
;
379 std::string byte_range_instance_size
=
380 content_range
.substr(byte_range_instance_size_start_offset
,
381 byte_range_instance_size_characters
);
383 if (!base::StringToInt64(byte_range_lower_bound
, content_range_lower_bound
))
385 if (!base::StringToInt64(byte_range_upper_bound
, content_range_upper_bound
))
387 if (!base::StringToInt64(byte_range_instance_size
,
388 content_range_instance_size
)) {
394 } // namespace content