1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7 // The license block is:
8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
11 * The contents of this file are subject to the Mozilla Public License Version
12 * 1.1 (the "License"); you may not use this file except in compliance with
13 * the License. You may obtain a copy of the License at
14 * http://www.mozilla.org/MPL/
16 * Software distributed under the License is distributed on an "AS IS" basis,
17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18 * for the specific language governing rights and limitations under the
21 * The Original Code is Mozilla.
23 * The Initial Developer of the Original Code is
24 * Netscape Communications.
25 * Portions created by the Initial Developer are Copyright (C) 2001
26 * the Initial Developer. All Rights Reserved.
29 * Darin Fisher <darin@netscape.com> (original author)
31 * Alternatively, the contents of this file may be used under the terms of
32 * either the GNU General Public License Version 2 or later (the "GPL"), or
33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34 * in which case the provisions of the GPL or the LGPL are applicable instead
35 * of those above. If you wish to allow use of your version of this file only
36 * under the terms of either the GPL or the LGPL, and not to allow others to
37 * use your version of this file under the terms of the MPL, indicate your
38 * decision by deleting the provisions above and replace them with the notice
39 * and other provisions required by the GPL or the LGPL. If you do not delete
40 * the provisions above, a recipient may use your version of this file under
41 * the terms of any one of the MPL, the GPL or the LGPL.
43 * ***** END LICENSE BLOCK ***** */
45 #include "net/http/http_chunked_decoder.h"
47 #include "base/logging.h"
48 #include "base/strings/string_number_conversions.h"
49 #include "base/strings/string_piece.h"
50 #include "base/strings/string_util.h"
51 #include "net/base/net_errors.h"
55 // Absurdly long size to avoid imposing a constraint on chunked encoding
57 const size_t HttpChunkedDecoder::kMaxLineBufLen
= 16384;
59 HttpChunkedDecoder::HttpChunkedDecoder()
60 : chunk_remaining_(0),
61 chunk_terminator_remaining_(false),
62 reached_last_chunk_(false),
67 int HttpChunkedDecoder::FilterBuf(char* buf
, int buf_len
) {
71 if (chunk_remaining_
) {
72 int num
= std::min(chunk_remaining_
, buf_len
);
75 chunk_remaining_
-= num
;
80 // After each chunk's data there should be a CRLF
81 if (!chunk_remaining_
)
82 chunk_terminator_remaining_
= true;
84 } else if (reached_eof_
) {
85 bytes_after_eof_
+= buf_len
;
89 int bytes_consumed
= ScanForChunkRemaining(buf
, buf_len
);
90 if (bytes_consumed
< 0)
91 return bytes_consumed
; // Error
93 buf_len
-= bytes_consumed
;
95 memmove(buf
, buf
+ bytes_consumed
, buf_len
);
101 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf
, int buf_len
) {
102 DCHECK_EQ(0, chunk_remaining_
);
103 DCHECK_GT(buf_len
, 0);
105 int bytes_consumed
= 0;
107 size_t index_of_lf
= base::StringPiece(buf
, buf_len
).find('\n');
108 if (index_of_lf
!= base::StringPiece::npos
) {
109 buf_len
= static_cast<int>(index_of_lf
);
110 if (buf_len
&& buf
[buf_len
- 1] == '\r') // Eliminate a preceding CR.
112 bytes_consumed
= static_cast<int>(index_of_lf
) + 1;
114 // Make buf point to the full line buffer to parse.
115 if (!line_buf_
.empty()) {
116 line_buf_
.append(buf
, buf_len
);
117 buf
= line_buf_
.data();
118 buf_len
= static_cast<int>(line_buf_
.size());
121 if (reached_last_chunk_
) {
123 DVLOG(1) << "ignoring http trailer";
126 } else if (chunk_terminator_remaining_
) {
128 DLOG(ERROR
) << "chunk data not terminated properly";
129 return ERR_INVALID_CHUNKED_ENCODING
;
131 chunk_terminator_remaining_
= false;
132 } else if (buf_len
) {
133 // Ignore any chunk-extensions.
134 size_t index_of_semicolon
= base::StringPiece(buf
, buf_len
).find(';');
135 if (index_of_semicolon
!= base::StringPiece::npos
)
136 buf_len
= static_cast<int>(index_of_semicolon
);
138 if (!ParseChunkSize(buf
, buf_len
, &chunk_remaining_
)) {
139 DLOG(ERROR
) << "Failed parsing HEX from: " <<
140 std::string(buf
, buf_len
);
141 return ERR_INVALID_CHUNKED_ENCODING
;
144 if (chunk_remaining_
== 0)
145 reached_last_chunk_
= true;
147 DLOG(ERROR
) << "missing chunk-size";
148 return ERR_INVALID_CHUNKED_ENCODING
;
152 // Save the partial line; wait for more data.
153 bytes_consumed
= buf_len
;
155 // Ignore a trailing CR
156 if (buf
[buf_len
- 1] == '\r')
159 if (line_buf_
.length() + buf_len
> kMaxLineBufLen
) {
160 DLOG(ERROR
) << "Chunked line length too long";
161 return ERR_INVALID_CHUNKED_ENCODING
;
164 line_buf_
.append(buf
, buf_len
);
166 return bytes_consumed
;
170 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
171 // some sites rely on more lenient parsing.
172 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
173 // (0x20) to be 7 characters long, such as "819b ".
175 // A comparison of browsers running on WindowsXP shows that
176 // they will parse the following inputs (egrep syntax):
178 // Let \X be the character class for a hex digit: [0-9a-fA-F]
182 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
183 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
184 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
186 // Our strategy is to be as strict as possible, while not breaking
190 bool HttpChunkedDecoder::ParseChunkSize(const char* start
, int len
, int* out
) {
193 // Strip trailing spaces
194 while (len
&& start
[len
- 1] == ' ')
197 // Be more restrictive than HexStringToInt;
198 // don't allow inputs with leading "-", "+", "0x", "0X"
199 base::StringPiece
chunk_size(start
, len
);
200 if (chunk_size
.find_first_not_of("0123456789abcdefABCDEF")
201 != base::StringPiece::npos
) {
206 bool ok
= base::HexStringToInt(chunk_size
, &parsed_number
);
207 if (ok
&& parsed_number
>= 0) {
208 *out
= parsed_number
;