1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/child/multipart_response_delegate.h"
7 #include "base/logging.h"
8 #include "base/strings/string_number_conversions.h"
9 #include "base/strings/string_util.h"
10 #include "net/base/net_util.h"
11 #include "net/http/http_util.h"
12 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
13 #include "third_party/WebKit/public/platform/WebString.h"
14 #include "third_party/WebKit/public/platform/WebURL.h"
15 #include "third_party/WebKit/public/platform/WebURLLoaderClient.h"
17 using blink::WebHTTPHeaderVisitor
;
18 using blink::WebString
;
19 using blink::WebURLLoader
;
20 using blink::WebURLLoaderClient
;
21 using blink::WebURLResponse
;
27 // The list of response headers that we do not copy from the original
28 // response when generating a WebURLResponse for a MIME payload.
29 const char* kReplaceHeaders
[] = {
32 "content-disposition",
38 class HeaderCopier
: public WebHTTPHeaderVisitor
{
40 HeaderCopier(WebURLResponse
* response
)
41 : response_(response
) {
43 virtual void visitHeader(const WebString
& name
, const WebString
& value
) {
44 const std::string
& name_utf8
= name
.utf8();
45 for (size_t i
= 0; i
< arraysize(kReplaceHeaders
); ++i
) {
46 if (LowerCaseEqualsASCII(name_utf8
, kReplaceHeaders
[i
]))
49 response_
->setHTTPHeaderField(name
, value
);
52 WebURLResponse
* response_
;
57 MultipartResponseDelegate::MultipartResponseDelegate(
58 WebURLLoaderClient
* client
,
60 const WebURLResponse
& response
,
61 const std::string
& boundary
)
64 original_response_(response
),
65 encoded_data_length_(0),
67 first_received_data_(true),
68 processing_headers_(false),
70 has_sent_first_response_(false) {
71 // Some servers report a boundary prefixed with "--". See bug 5786.
72 if (StartsWithASCII(boundary
, "--", true)) {
73 boundary_
.assign(boundary
);
75 boundary_
.append(boundary
);
79 void MultipartResponseDelegate::OnReceivedData(const char* data
,
81 int encoded_data_length
) {
82 // stop_sending_ means that we've already received the final boundary token.
83 // The server should stop sending us data at this point, but if it does, we
84 // just throw it away.
88 data_
.append(data
, data_len
);
89 encoded_data_length_
+= encoded_data_length
;
90 if (first_received_data_
) {
91 // Some servers don't send a boundary token before the first chunk of
92 // data. We handle this case anyway (Gecko does too).
93 first_received_data_
= false;
96 int pos
= PushOverLine(data_
, 0);
98 data_
= data_
.substr(pos
);
100 if (data_
.length() < boundary_
.length() + 2) {
101 // We don't have enough data yet to make a boundary token. Just wait
102 // until the next chunk of data arrives.
103 first_received_data_
= true;
107 if (0 != data_
.compare(0, boundary_
.length(), boundary_
)) {
108 data_
= boundary_
+ "\n" + data_
;
111 DCHECK(!first_received_data_
);
114 if (processing_headers_
) {
116 int pos
= PushOverLine(data_
, 0);
118 data_
= data_
.substr(pos
);
120 if (ParseHeaders()) {
121 // Successfully parsed headers.
122 processing_headers_
= false;
124 // Get more data before trying again.
128 DCHECK(!processing_headers_
);
131 while ((boundary_pos
= FindBoundary()) != std::string::npos
) {
133 // Strip out trailing \n\r characters in the buffer preceding the
134 // boundary on the same lines as Firefox.
135 size_t data_length
= boundary_pos
;
136 if (boundary_pos
> 0 && data_
[boundary_pos
- 1] == '\n') {
138 if (boundary_pos
> 1 && data_
[boundary_pos
- 2] == '\r') {
142 if (data_length
> 0) {
143 // Send the last data chunk.
144 client_
->didReceiveData(loader_
,
146 static_cast<int>(data_length
),
147 encoded_data_length_
);
148 encoded_data_length_
= 0;
151 size_t boundary_end_pos
= boundary_pos
+ boundary_
.length();
152 if (boundary_end_pos
< data_
.length() && '-' == data_
[boundary_end_pos
]) {
153 // This was the last boundary so we can stop processing.
154 stop_sending_
= true;
159 // We can now throw out data up through the boundary
160 int offset
= PushOverLine(data_
, boundary_end_pos
);
161 data_
= data_
.substr(boundary_end_pos
+ offset
);
163 // Ok, back to parsing headers
164 if (!ParseHeaders()) {
165 processing_headers_
= true;
170 // At this point, we should send over any data we have, but keep enough data
171 // buffered to handle a boundary that may have been truncated.
172 if (!processing_headers_
&& data_
.length() > boundary_
.length()) {
173 // If the last character is a new line character, go ahead and just send
174 // everything we have buffered. This matches an optimization in Gecko.
175 int send_length
= data_
.length() - boundary_
.length();
176 if (data_
[data_
.length() - 1] == '\n')
177 send_length
= data_
.length();
179 client_
->didReceiveData(loader_
,
182 encoded_data_length_
);
183 data_
= data_
.substr(send_length
);
184 encoded_data_length_
= 0;
188 void MultipartResponseDelegate::OnCompletedRequest() {
189 // If we have any pending data and we're not in a header, go ahead and send
191 if (!processing_headers_
&& !data_
.empty() && !stop_sending_
&& client_
) {
192 client_
->didReceiveData(loader_
,
194 static_cast<int>(data_
.length()),
195 encoded_data_length_
);
196 encoded_data_length_
= 0;
200 int MultipartResponseDelegate::PushOverLine(const std::string
& data
,
203 if (pos
< data
.length() && (data
[pos
] == '\r' || data
[pos
] == '\n')) {
205 if (pos
+ 1 < data
.length() && data
[pos
+ 1] == '\n')
211 bool MultipartResponseDelegate::ParseHeaders() {
212 int line_feed_increment
= 1;
214 // Grab the headers being liberal about line endings.
215 size_t line_start_pos
= 0;
216 size_t line_end_pos
= data_
.find('\n');
217 while (line_end_pos
!= std::string::npos
) {
219 if (line_end_pos
> line_start_pos
&& data_
[line_end_pos
- 1] == '\r') {
220 line_feed_increment
= 2;
223 line_feed_increment
= 1;
225 if (line_start_pos
== line_end_pos
) {
226 // A blank line, end of headers
227 line_end_pos
+= line_feed_increment
;
230 // Find the next header line.
231 line_start_pos
= line_end_pos
+ line_feed_increment
;
232 line_end_pos
= data_
.find('\n', line_start_pos
);
234 // Truncated in the middle of a header, stop parsing.
235 if (line_end_pos
== std::string::npos
)
239 std::string
headers("\n");
240 headers
.append(data_
, 0, line_end_pos
);
241 data_
= data_
.substr(line_end_pos
);
243 // Create a WebURLResponse based on the original set of headers + the
244 // replacement headers. We only replace the same few headers that gecko
245 // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
246 std::string content_type
= net::GetSpecificHeader(headers
, "content-type");
247 std::string mime_type
;
249 bool has_charset
= false;
250 net::HttpUtil::ParseContentType(content_type
, &mime_type
, &charset
,
252 WebURLResponse
response(original_response_
.url());
253 response
.setMIMEType(WebString::fromUTF8(mime_type
));
254 response
.setTextEncodingName(WebString::fromUTF8(charset
));
256 HeaderCopier
copier(&response
);
257 original_response_
.visitHTTPHeaderFields(&copier
);
259 for (size_t i
= 0; i
< arraysize(kReplaceHeaders
); ++i
) {
260 std::string
name(kReplaceHeaders
[i
]);
261 std::string value
= net::GetSpecificHeader(headers
, name
);
262 if (!value
.empty()) {
263 response
.setHTTPHeaderField(WebString::fromUTF8(name
),
264 WebString::fromUTF8(value
));
267 // To avoid recording every multipart load as a separate visit in
268 // the history database, we want to keep track of whether the response
269 // is part of a multipart payload. We do want to record the first visit,
270 // so we only set isMultipartPayload to true after the first visit.
271 response
.setIsMultipartPayload(has_sent_first_response_
);
272 has_sent_first_response_
= true;
273 // Send the response!
275 client_
->didReceiveResponse(loader_
, response
);
280 // Boundaries are supposed to be preceeded with --, but it looks like gecko
281 // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken.
282 size_t MultipartResponseDelegate::FindBoundary() {
283 size_t boundary_pos
= data_
.find(boundary_
);
284 if (boundary_pos
!= std::string::npos
) {
285 // Back up over -- for backwards compat
286 // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem
288 if (boundary_pos
>= 2) {
289 if ('-' == data_
[boundary_pos
- 1] && '-' == data_
[boundary_pos
- 2]) {
291 boundary_
= "--" + boundary_
;
298 bool MultipartResponseDelegate::ReadMultipartBoundary(
299 const WebURLResponse
& response
,
300 std::string
* multipart_boundary
) {
301 std::string content_type
=
302 response
.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
304 size_t boundary_start_offset
= content_type
.find("boundary=");
305 if (boundary_start_offset
== std::string::npos
)
308 boundary_start_offset
+= strlen("boundary=");
310 size_t boundary_end_offset
= content_type
.find(';', boundary_start_offset
);
312 if (boundary_end_offset
== std::string::npos
)
313 boundary_end_offset
= content_type
.length();
315 size_t boundary_length
= boundary_end_offset
- boundary_start_offset
;
317 *multipart_boundary
=
318 content_type
.substr(boundary_start_offset
, boundary_length
);
319 // The byte range response can have quoted boundary strings. This is legal
320 // as per MIME specifications. Individual data fragements however don't
321 // contain quoted boundary strings.
322 base::TrimString(*multipart_boundary
, "\"", multipart_boundary
);
326 bool MultipartResponseDelegate::ReadContentRanges(
327 const WebURLResponse
& response
,
328 int64
* content_range_lower_bound
,
329 int64
* content_range_upper_bound
,
330 int64
* content_range_instance_size
) {
332 std::string content_range
= response
.httpHeaderField("Content-Range").utf8();
333 if (content_range
.empty()) {
334 content_range
= response
.httpHeaderField("Range").utf8();
337 if (content_range
.empty()) {
338 DLOG(WARNING
) << "Failed to read content range from response.";
342 size_t byte_range_lower_bound_start_offset
= content_range
.find(" ");
343 if (byte_range_lower_bound_start_offset
== std::string::npos
) {
347 // Skip over the initial space.
348 byte_range_lower_bound_start_offset
++;
350 // Find the lower bound.
351 size_t byte_range_lower_bound_end_offset
=
352 content_range
.find("-", byte_range_lower_bound_start_offset
);
353 if (byte_range_lower_bound_end_offset
== std::string::npos
) {
357 size_t byte_range_lower_bound_characters
=
358 byte_range_lower_bound_end_offset
- byte_range_lower_bound_start_offset
;
359 std::string byte_range_lower_bound
=
360 content_range
.substr(byte_range_lower_bound_start_offset
,
361 byte_range_lower_bound_characters
);
363 // Find the upper bound.
364 size_t byte_range_upper_bound_start_offset
=
365 byte_range_lower_bound_end_offset
+ 1;
367 size_t byte_range_upper_bound_end_offset
=
368 content_range
.find("/", byte_range_upper_bound_start_offset
);
369 if (byte_range_upper_bound_end_offset
== std::string::npos
) {
373 size_t byte_range_upper_bound_characters
=
374 byte_range_upper_bound_end_offset
- byte_range_upper_bound_start_offset
;
375 std::string byte_range_upper_bound
=
376 content_range
.substr(byte_range_upper_bound_start_offset
,
377 byte_range_upper_bound_characters
);
379 // Find the instance size.
380 size_t byte_range_instance_size_start_offset
=
381 byte_range_upper_bound_end_offset
+ 1;
383 size_t byte_range_instance_size_end_offset
=
384 content_range
.length();
386 size_t byte_range_instance_size_characters
=
387 byte_range_instance_size_end_offset
-
388 byte_range_instance_size_start_offset
;
389 std::string byte_range_instance_size
=
390 content_range
.substr(byte_range_instance_size_start_offset
,
391 byte_range_instance_size_characters
);
393 if (!base::StringToInt64(byte_range_lower_bound
, content_range_lower_bound
))
395 if (!base::StringToInt64(byte_range_upper_bound
, content_range_upper_bound
))
397 if (!base::StringToInt64(byte_range_instance_size
,
398 content_range_instance_size
)) {
404 } // namespace content