Windows should animate when they are about to get docked at screen edges.
[chromium-blink-merge.git] / net / tools / flip_server / balsa_frame.cc
blobdead665771be1f9d7bf6888dc86e3c105eecedc8
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/tools/flip_server/balsa_frame.h"
7 #include <assert.h>
8 #if __SSE2__
9 #include <emmintrin.h>
10 #endif // __SSE2__
11 #include <strings.h>
13 #include <limits>
14 #include <string>
15 #include <utility>
16 #include <vector>
18 #include "base/logging.h"
19 #include "base/port.h"
20 #include "base/strings/string_piece.h"
21 #include "net/tools/flip_server/balsa_enums.h"
22 #include "net/tools/flip_server/balsa_headers.h"
23 #include "net/tools/flip_server/balsa_visitor_interface.h"
24 #include "net/tools/flip_server/buffer_interface.h"
25 #include "net/tools/flip_server/simple_buffer.h"
26 #include "net/tools/flip_server/split.h"
27 #include "net/tools/flip_server/string_piece_utils.h"
29 namespace net {
31 // Constants holding some header names for headers which can affect the way the
32 // HTTP message is framed, and so must be processed specially:
33 static const char kContentLength[] = "content-length";
34 static const size_t kContentLengthSize = sizeof(kContentLength) - 1;
35 static const char kTransferEncoding[] = "transfer-encoding";
36 static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1;
38 BalsaFrame::BalsaFrame()
39 : last_char_was_slash_r_(false),
40 saw_non_newline_char_(false),
41 start_was_space_(true),
42 chunk_length_character_extracted_(false),
43 is_request_(true),
44 request_was_head_(false),
45 max_header_length_(16 * 1024),
46 max_request_uri_length_(2048),
47 visitor_(&do_nothing_visitor_),
48 chunk_length_remaining_(0),
49 content_length_remaining_(0),
50 last_slash_n_loc_(NULL),
51 last_recorded_slash_n_loc_(NULL),
52 last_slash_n_idx_(0),
53 term_chars_(0),
54 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),
55 last_error_(BalsaFrameEnums::NO_ERROR),
56 headers_(NULL) {
59 BalsaFrame::~BalsaFrame() {}
61 void BalsaFrame::Reset() {
62 last_char_was_slash_r_ = false;
63 saw_non_newline_char_ = false;
64 start_was_space_ = true;
65 chunk_length_character_extracted_ = false;
66 // is_request_ = true; // not reset between messages.
67 // request_was_head_ = false; // not reset between messages.
68 // max_header_length_ = 4096; // not reset between messages.
69 // max_request_uri_length_ = 2048; // not reset between messages.
70 // visitor_ = &do_nothing_visitor_; // not reset between messages.
71 chunk_length_remaining_ = 0;
72 content_length_remaining_ = 0;
73 last_slash_n_loc_ = NULL;
74 last_recorded_slash_n_loc_ = NULL;
75 last_slash_n_idx_ = 0;
76 term_chars_ = 0;
77 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
78 last_error_ = BalsaFrameEnums::NO_ERROR;
79 lines_.clear();
80 if (headers_ != NULL) {
81 headers_->Clear();
85 const char* BalsaFrameEnums::ParseStateToString(
86 BalsaFrameEnums::ParseState error_code) {
87 switch (error_code) {
88 case PARSE_ERROR:
89 return "PARSE_ERROR";
90 case READING_HEADER_AND_FIRSTLINE:
91 return "READING_HEADER_AND_FIRSTLINE";
92 case READING_CHUNK_LENGTH:
93 return "READING_CHUNK_LENGTH";
94 case READING_CHUNK_EXTENSION:
95 return "READING_CHUNK_EXTENSION";
96 case READING_CHUNK_DATA:
97 return "READING_CHUNK_DATA";
98 case READING_CHUNK_TERM:
99 return "READING_CHUNK_TERM";
100 case READING_LAST_CHUNK_TERM:
101 return "READING_LAST_CHUNK_TERM";
102 case READING_TRAILER:
103 return "READING_TRAILER";
104 case READING_UNTIL_CLOSE:
105 return "READING_UNTIL_CLOSE";
106 case READING_CONTENT:
107 return "READING_CONTENT";
108 case MESSAGE_FULLY_READ:
109 return "MESSAGE_FULLY_READ";
110 case NUM_STATES:
111 return "UNKNOWN_STATE";
113 return "UNKNOWN_STATE";
116 const char* BalsaFrameEnums::ErrorCodeToString(
117 BalsaFrameEnums::ErrorCode error_code) {
118 switch (error_code) {
119 case NO_ERROR:
120 return "NO_ERROR";
121 case NO_STATUS_LINE_IN_RESPONSE:
122 return "NO_STATUS_LINE_IN_RESPONSE";
123 case NO_REQUEST_LINE_IN_REQUEST:
124 return "NO_REQUEST_LINE_IN_REQUEST";
125 case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION:
126 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION";
127 case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD:
128 return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD";
129 case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE:
130 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE";
131 case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI:
132 return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI";
133 case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE:
134 return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE";
135 case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION:
136 return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION";
137 case FAILED_CONVERTING_STATUS_CODE_TO_INT:
138 return "FAILED_CONVERTING_STATUS_CODE_TO_INT";
139 case REQUEST_URI_TOO_LONG:
140 return "REQUEST_URI_TOO_LONG";
141 case HEADERS_TOO_LONG:
142 return "HEADERS_TOO_LONG";
143 case UNPARSABLE_CONTENT_LENGTH:
144 return "UNPARSABLE_CONTENT_LENGTH";
145 case MAYBE_BODY_BUT_NO_CONTENT_LENGTH:
146 return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH";
147 case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH:
148 return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH";
149 case HEADER_MISSING_COLON:
150 return "HEADER_MISSING_COLON";
151 case INVALID_CHUNK_LENGTH:
152 return "INVALID_CHUNK_LENGTH";
153 case CHUNK_LENGTH_OVERFLOW:
154 return "CHUNK_LENGTH_OVERFLOW";
155 case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO:
156 return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO";
157 case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT:
158 return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT";
159 case MULTIPLE_CONTENT_LENGTH_KEYS:
160 return "MULTIPLE_CONTENT_LENGTH_KEYS";
161 case MULTIPLE_TRANSFER_ENCODING_KEYS:
162 return "MULTIPLE_TRANSFER_ENCODING_KEYS";
163 case UNKNOWN_TRANSFER_ENCODING:
164 return "UNKNOWN_TRANSFER_ENCODING";
165 case INVALID_HEADER_FORMAT:
166 return "INVALID_HEADER_FORMAT";
167 case INTERNAL_LOGIC_ERROR:
168 return "INTERNAL_LOGIC_ERROR";
169 case NUM_ERROR_CODES:
170 return "UNKNOWN_ERROR";
172 return "UNKNOWN_ERROR";
175 // Summary:
176 // Parses the first line of either a request or response.
177 // Note that in the case of a detected warning, error_code will be set
178 // but the function will not return false.
179 // Exactly zero or one warning or error (but not both) may be detected
180 // by this function.
181 // Note that this function will not write the data of the first-line
182 // into the header's buffer (that should already have been done elsewhere).
184 // Pre-conditions:
185 // begin != end
186 // *begin should be a character which is > ' '. This implies that there
187 // is at least one non-whitespace characters between [begin, end).
188 // headers is a valid pointer to a BalsaHeaders class.
189 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
190 // Entire first line must exist between [begin, end)
191 // Exactly zero or one newlines -may- exist between [begin, end)
192 // [begin, end) should exist in the header's buffer.
194 // Side-effects:
195 // headers will be modified
196 // error_code may be modified if either a warning or error is detected
198 // Returns:
199 // True if no error (as opposed to warning) is detected.
200 // False if an error (as opposed to warning) is detected.
203 // If there is indeed non-whitespace in the line, then the following
204 // will take care of this for you:
205 // while (*begin <= ' ') ++begin;
206 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);
208 bool ParseHTTPFirstLine(const char* begin,
209 const char* end,
210 bool is_request,
211 size_t max_request_uri_length,
212 BalsaHeaders* headers,
213 BalsaFrameEnums::ErrorCode* error_code) {
214 const char* current = begin;
215 // HTTP firstlines all have the following structure:
216 // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF
217 // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"
218 // ws1 nws1 ws2 nws2 ws3 nws3 ws4
219 // | [-------) [-------) [----------------)
220 // REQ: method request_uri version
221 // RESP: version statuscode reason
223 // The first NONWS->LWS component we'll call firstline_a.
224 // The second firstline_b, and the third firstline_c.
226 // firstline_a goes from nws1 to (but not including) ws2
227 // firstline_b goes from nws2 to (but not including) ws3
228 // firstline_c goes from nws3 to (but not including) ws4
230 // In the code:
231 // ws1 == whitespace_1_idx_
232 // nws1 == non_whitespace_1_idx_
233 // ws2 == whitespace_2_idx_
234 // nws2 == non_whitespace_2_idx_
235 // ws3 == whitespace_3_idx_
236 // nws3 == non_whitespace_3_idx_
237 // ws4 == whitespace_4_idx_
239 // Kill all whitespace (including '\r\n') at the end of the line.
240 --end;
241 if (*end != '\n') {
242 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
243 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
244 << headers->OriginalHeadersForDebugging();
245 return false;
247 while (begin < end && *end <= ' ') {
248 --end;
250 DCHECK(*end != '\n');
251 if (*end == '\n') {
252 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
253 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
254 << headers->OriginalHeadersForDebugging();
255 return false;
257 ++end;
259 // The two following statements should not be possible.
260 if (end == begin) {
261 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
262 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
263 << headers->OriginalHeadersForDebugging();
264 return false;
267 // whitespace_1_idx_
268 headers->whitespace_1_idx_ = current - begin;
269 // This loop is commented out as it is never used in current code. This is
270 // true only because we don't begin parsing the headers at all until we've
271 // encountered a non whitespace character at the beginning of the stream, at
272 // which point we begin our demarcation of header-start. If we did -not- do
273 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop
274 // would be necessary for the proper functioning of this parsing.
275 // This is left here as this function may (in the future) be refactored out
276 // of the BalsaFrame class so that it may be shared between code in
277 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the
278 // set_first_line() function (at which point it would be necessary).
279 #if 0
280 while (*current <= ' ') {
281 ++current;
283 #endif
284 // non_whitespace_1_idx_
285 headers->non_whitespace_1_idx_ = current - begin;
286 do {
287 // The first time through, we're guaranteed that the current character
288 // won't be a whitespace (else the loop above wouldn't have terminated).
289 // That implies that we're guaranteed to get at least one non-whitespace
290 // character if we get into this loop at all.
291 ++current;
292 if (current == end) {
293 headers->whitespace_2_idx_ = current - begin;
294 headers->non_whitespace_2_idx_ = current - begin;
295 headers->whitespace_3_idx_ = current - begin;
296 headers->non_whitespace_3_idx_ = current - begin;
297 headers->whitespace_4_idx_ = current - begin;
298 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
299 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
300 *error_code =
301 static_cast<BalsaFrameEnums::ErrorCode>(
302 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
303 is_request);
304 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
305 return false;
307 goto output_exhausted;
309 } while (*current > ' ');
310 // whitespace_2_idx_
311 headers->whitespace_2_idx_ = current - begin;
312 do {
313 ++current;
314 // Note that due to the loop which consumes all of the whitespace
315 // at the end of the line, current can never == end while in this function.
316 } while (*current <= ' ');
317 // non_whitespace_2_idx_
318 headers->non_whitespace_2_idx_ = current - begin;
319 do {
320 ++current;
321 if (current == end) {
322 headers->whitespace_3_idx_ = current - begin;
323 headers->non_whitespace_3_idx_ = current - begin;
324 headers->whitespace_4_idx_ = current - begin;
325 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request
326 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response
327 *error_code =
328 static_cast<BalsaFrameEnums::ErrorCode>(
329 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE
330 + is_request);
331 goto output_exhausted;
333 } while (*current > ' ');
334 // whitespace_3_idx_
335 headers->whitespace_3_idx_ = current - begin;
336 do {
337 ++current;
338 // Note that due to the loop which consumes all of the whitespace
339 // at the end of the line, current can never == end while in this function.
340 } while (*current <= ' ');
341 // non_whitespace_3_idx_
342 headers->non_whitespace_3_idx_ = current - begin;
343 headers->whitespace_4_idx_ = end - begin;
345 output_exhausted:
346 // Note that we don't fail the parse immediately when parsing of the
347 // firstline fails. Depending on the protocol type, we may want to accept
348 // a firstline with only one or two elements, e.g., for HTTP/0.9:
349 // GET\r\n
350 // or
351 // GET /\r\n
352 // should be parsed without issue (though the visitor should know that
353 // parsing the entire line was not exactly as it should be).
355 // Eventually, these errors may be removed alltogether, as the visitor can
356 // detect them on its own by examining the size of the various fields.
357 // headers->set_first_line(non_whitespace_1_idx_, current);
359 if (is_request) {
360 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >
361 max_request_uri_length) {
362 // For requests, we need at least the method. We could assume that a
363 // blank URI means "/". If version isn't stated, it should be assumed
364 // to be HTTP/0.9 by the visitor.
365 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;
366 return false;
368 } else {
369 headers->parsed_response_code_ = 0;
371 const char* parsed_response_code_current =
372 begin + headers->non_whitespace_2_idx_;
373 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;
374 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
376 // Convert a string of [0-9]* into an int.
377 // Note that this allows for the conversion of response codes which
378 // are outside the bounds of normal HTTP response codes (no checking
379 // is done to ensure that these are valid-- they're merely parsed)!
380 while (parsed_response_code_current < parsed_response_code_end) {
381 if (*parsed_response_code_current < '0' ||
382 *parsed_response_code_current > '9') {
383 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
384 return false;
386 size_t status_code_x_10 = headers->parsed_response_code_ * 10;
387 uint8 c = *parsed_response_code_current - '0';
388 if ((headers->parsed_response_code_ > kMaxDiv10) ||
389 (std::numeric_limits<size_t>::max() - status_code_x_10) < c) {
390 // overflow.
391 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
392 return false;
394 headers->parsed_response_code_ = status_code_x_10 + c;
395 ++parsed_response_code_current;
399 return true;
402 // begin - beginning of the firstline
403 // end - end of the firstline
405 // A precondition for this function is that there is non-whitespace between
406 // [begin, end). If this precondition is not met, the function will not perform
407 // as expected (and bad things may happen, and it will eat your first, second,
408 // and third unborn children!).
410 // Another precondition for this function is that [begin, end) includes
411 // at most one newline, which must be at the end of the line.
412 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
413 BalsaFrameEnums::ErrorCode previous_error = last_error_;
414 if (!ParseHTTPFirstLine(begin,
415 end,
416 is_request_,
417 max_request_uri_length_,
418 headers_,
419 &last_error_)) {
420 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
421 visitor_->HandleHeaderError(this);
422 return;
424 if (previous_error != last_error_) {
425 visitor_->HandleHeaderWarning(this);
428 if (is_request_) {
429 int version_length =
430 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_;
431 visitor_->ProcessRequestFirstLine(
432 begin + headers_->non_whitespace_1_idx_,
433 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
434 begin + headers_->non_whitespace_1_idx_,
435 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
436 begin + headers_->non_whitespace_2_idx_,
437 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
438 begin + headers_->non_whitespace_3_idx_,
439 version_length);
440 if (version_length == 0)
441 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
442 } else {
443 visitor_->ProcessResponseFirstLine(
444 begin + headers_->non_whitespace_1_idx_,
445 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
446 begin + headers_->non_whitespace_1_idx_,
447 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
448 begin + headers_->non_whitespace_2_idx_,
449 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
450 begin + headers_->non_whitespace_3_idx_,
451 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
455 // 'stream_begin' points to the first character of the headers buffer.
456 // 'line_begin' points to the first character of the line.
457 // 'current' points to a char which is ':'.
458 // 'line_end' points to the position of '\n' + 1.
459 // 'line_begin' points to the position of first character of line.
460 void BalsaFrame::CleanUpKeyValueWhitespace(
461 const char* stream_begin,
462 const char* line_begin,
463 const char* current,
464 const char* line_end,
465 HeaderLineDescription* current_header_line) {
466 const char* colon_loc = current;
467 DCHECK_LT(colon_loc, line_end);
468 DCHECK_EQ(':', *colon_loc);
469 DCHECK_EQ(':', *current);
470 DCHECK_GE(' ', *line_end)
471 << "\"" << std::string(line_begin, line_end) << "\"";
473 // TODO(fenix): Investigate whether or not the bounds tests in the
474 // while loops here are redundant, and if so, remove them.
475 --current;
476 while (current > line_begin && *current <= ' ') --current;
477 current += (current != colon_loc);
478 current_header_line->key_end_idx = current - stream_begin;
480 current = colon_loc;
481 DCHECK_EQ(':', *current);
482 ++current;
483 while (current < line_end && *current <= ' ') ++current;
484 current_header_line->value_begin_idx = current - stream_begin;
486 DCHECK_GE(current_header_line->key_end_idx,
487 current_header_line->first_char_idx);
488 DCHECK_GE(current_header_line->value_begin_idx,
489 current_header_line->key_end_idx);
490 DCHECK_GE(current_header_line->last_char_idx,
491 current_header_line->value_begin_idx);
494 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() {
495 DCHECK(!lines_.empty());
496 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
497 // The last line is always just a newline (and is uninteresting).
498 const Lines::size_type lines_size_m1 = lines_.size() - 1;
499 #if __SSE2__
500 const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':',
501 ':', ':', ':', ':', ':', ':', ':', ':'};
502 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;
503 #endif // __SSE2__
504 const char* current = stream_begin + lines_[1].first;
505 // This code is a bit more subtle than it may appear at first glance.
506 // This code looks for a colon in the current line... but it also looks
507 // beyond the current line. If there is no colon in the current line, then
508 // for each subsequent line (until the colon which -has- been found is
509 // associated with a line), no searching for a colon will be performed. In
510 // this way, we minimize the amount of bytes we have scanned for a colon.
511 for (Lines::size_type i = 1; i < lines_size_m1;) {
512 const char* line_begin = stream_begin + lines_[i].first;
514 // Here we handle possible continuations. Note that we do not replace
515 // the '\n' in the line before a continuation (at least, as of now),
516 // which implies that any code which looks for a value must deal with
517 // "\r\n", etc -within- the line (and not just at the end of it).
518 for (++i; i < lines_size_m1; ++i) {
519 const char c = *(stream_begin + lines_[i].first);
520 if (c > ' ') {
521 // Not a continuation, so stop. Note that if the 'original' i = 1,
522 // and the next line is not a continuation, we'll end up with i = 2
523 // when we break. This handles the incrementing of i for the outer
524 // loop.
525 break;
528 const char* line_end = stream_begin + lines_[i - 1].second;
529 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
531 // We cleanup the whitespace at the end of the line before doing anything
532 // else of interest as it allows us to do nothing when irregularly formatted
533 // headers are parsed (e.g. those with only keys, only values, or no colon).
535 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
536 --line_end;
537 DCHECK_EQ('\n', *line_end)
538 << "\"" << std::string(line_begin, line_end) << "\"";
539 while (*line_end <= ' ' && line_end > line_begin) {
540 --line_end;
542 ++line_end;
543 DCHECK_GE(' ', *line_end);
544 DCHECK_LT(line_begin, line_end);
546 // We use '0' for the block idx, because we're always writing to the first
547 // block from the framer (we do this because the framer requires that the
548 // entire header sequence be in a contiguous buffer).
549 headers_->header_lines_.push_back(
550 HeaderLineDescription(line_begin - stream_begin,
551 line_end - stream_begin,
552 line_end - stream_begin,
553 line_end - stream_begin,
554 0));
555 if (current >= line_end) {
556 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
557 visitor_->HandleHeaderWarning(this);
558 // Then the next colon will not be found within this header line-- time
559 // to try again with another header-line.
560 continue;
561 } else if (current < line_begin) {
562 // When this condition is true, the last detected colon was part of a
563 // previous line. We reset to the beginning of the line as we don't care
564 // about the presence of any colon before the beginning of the current
565 // line.
566 current = line_begin;
568 #if __SSE2__
569 while (current < header_lines_end_m16) {
570 __m128i header_bytes =
571 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current));
572 __m128i colon_cmp =
573 _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons));
574 int colon_msk = _mm_movemask_epi8(colon_cmp);
575 if (colon_msk == 0) {
576 current += 16;
577 continue;
579 current += (ffs(colon_msk) - 1);
580 if (current > line_end) {
581 break;
583 goto found_colon;
585 #endif // __SSE2__
586 for (; current < line_end; ++current) {
587 if (*current != ':') {
588 continue;
590 goto found_colon;
592 // If we've gotten to here, then there was no colon
593 // in the line. The arguments we passed into the construction
594 // for the HeaderLineDescription object should be OK-- it assumes
595 // that the entire content is 'key' by default (which is true, as
596 // there was no colon, there can be no value). Note that this is a
597 // construct which is technically not allowed by the spec.
598 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
599 visitor_->HandleHeaderWarning(this);
600 continue;
601 found_colon:
602 DCHECK_EQ(*current, ':');
603 DCHECK_LE(current - stream_begin, line_end - stream_begin);
604 DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
606 HeaderLineDescription& current_header_line = headers_->header_lines_.back();
607 current_header_line.key_end_idx = current - stream_begin;
608 current_header_line.value_begin_idx = current_header_line.key_end_idx;
609 if (current < line_end) {
610 ++current_header_line.key_end_idx;
612 CleanUpKeyValueWhitespace(stream_begin,
613 line_begin,
614 current,
615 line_end,
616 &current_header_line);
621 void BalsaFrame::ProcessContentLengthLine(
622 HeaderLines::size_type line_idx,
623 BalsaHeadersEnums::ContentLengthStatus* status,
624 size_t* length) {
625 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
626 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
627 const char* line_end = stream_begin + header_line.last_char_idx;
628 const char* value_begin = (stream_begin + header_line.value_begin_idx);
630 if (value_begin >= line_end) {
631 // There is no non-whitespace value data.
632 #if DEBUGFRAMER
633 LOG(INFO) << "invalid content-length -- no non-whitespace value data";
634 #endif
635 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
636 return;
639 *length = 0;
640 while (value_begin < line_end) {
641 if (*value_begin < '0' || *value_begin > '9') {
642 // bad! content-length found, and couldn't parse all of it!
643 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
644 #if DEBUGFRAMER
645 LOG(INFO) << "invalid content-length - non numeric character detected";
646 #endif // DEBUGFRAMER
647 return;
649 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
650 size_t length_x_10 = *length * 10;
651 const unsigned char c = *value_begin - '0';
652 if (*length > kMaxDiv10 ||
653 (std::numeric_limits<size_t>::max() - length_x_10) < c) {
654 *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
655 #if DEBUGFRAMER
656 LOG(INFO) << "content-length overflow";
657 #endif // DEBUGFRAMER
658 return;
660 *length = length_x_10 + c;
661 ++value_begin;
663 #if DEBUGFRAMER
664 LOG(INFO) << "content_length parsed: " << *length;
665 #endif // DEBUGFRAMER
666 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;
669 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
670 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
671 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
672 const char* line_end = stream_begin + header_line.last_char_idx;
673 const char* value_begin = stream_begin + header_line.value_begin_idx;
674 size_t value_length = line_end - value_begin;
676 if ((value_length == 7) &&
677 !strncasecmp(value_begin, "chunked", 7)) {
678 headers_->transfer_encoding_is_chunked_ = true;
679 } else if ((value_length == 8) &&
680 !strncasecmp(value_begin, "identity", 8)) {
681 headers_->transfer_encoding_is_chunked_ = false;
682 } else {
683 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;
684 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
685 visitor_->HandleHeaderError(this);
686 return;
690 namespace {
691 bool SplitStringPiece(base::StringPiece original, char delim,
692 base::StringPiece* before, base::StringPiece* after) {
693 const char* p = original.data();
694 const char* end = p + original.size();
696 while (p != end) {
697 if (*p == delim) {
698 ++p;
699 } else {
700 const char* start = p;
701 while (++p != end && *p != delim) {
702 // Skip to the next occurence of the delimiter.
704 *before = base::StringPiece(start, p - start);
705 if (p != end)
706 *after = base::StringPiece(p + 1, end - (p + 1));
707 else
708 *after = base::StringPiece("");
709 StringPieceUtils::RemoveWhitespaceContext(before);
710 StringPieceUtils::RemoveWhitespaceContext(after);
711 return true;
715 *before = original;
716 *after = "";
717 return false;
720 // TODO(phython): Fix this function to properly deal with quoted values.
721 // E.g. ";;foo", "\";;\"", or \"aa;
722 // The last example, the semi-colon is a separator between extensions.
723 void ProcessChunkExtensionsManual(base::StringPiece all_extensions,
724 BalsaHeaders* extensions) {
725 base::StringPiece extension;
726 base::StringPiece remaining;
727 StringPieceUtils::RemoveWhitespaceContext(&all_extensions);
728 SplitStringPiece(all_extensions, ';', &extension, &remaining);
729 while (!extension.empty()) {
730 base::StringPiece key;
731 base::StringPiece value;
732 SplitStringPiece(extension, '=', &key, &value);
733 if (!value.empty()) {
734 // Strip quotation marks if they exist.
735 if (!value.empty() && value[0] == '"')
736 value.remove_prefix(1);
737 if (!value.empty() && value[value.length() - 1] == '"')
738 value.remove_suffix(1);
741 extensions->AppendHeader(key, value);
743 StringPieceUtils::RemoveWhitespaceContext(&remaining);
744 SplitStringPiece(remaining, ';', &extension, &remaining);
748 // TODO(phython): Fix this function to properly deal with quoted values.
749 // E.g. ";;foo", "\";;\"", or \"aa;
750 // The last example, the semi-colon is a separator between extensions.
751 void ProcessChunkExtensionsGoogle3(const char* input, size_t size,
752 BalsaHeaders* extensions) {
753 std::vector<base::StringPiece> key_values;
754 SplitStringPieceToVector(base::StringPiece(input, size), ";",
755 &key_values, true);
756 for (unsigned int i = 0; i < key_values.size(); ++i) {
757 base::StringPiece key = key_values[i].substr(0, key_values[i].find('='));
758 base::StringPiece value;
759 if (key.length() < key_values[i].length()) {
760 value = key_values[i].substr(key.length() + 1);
761 // Remove any leading and trailing whitespace.
762 StringPieceUtils::RemoveWhitespaceContext(&value);
764 // Strip quotation marks if they exist.
765 if (!value.empty() && value[0] == '"')
766 value.remove_prefix(1);
767 if (!value.empty() && value[value.length() - 1] == '"')
768 value.remove_suffix(1);
771 // Strip the key whitespace after checking that there is a value.
772 StringPieceUtils::RemoveWhitespaceContext(&key);
773 extensions->AppendHeader(key, value);
777 } // anonymous namespace
779 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size,
780 BalsaHeaders* extensions) {
781 #if 0
782 ProcessChunkExtensionsGoogle3(input, size, extensions);
783 #else
784 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);
785 #endif
788 void BalsaFrame::ProcessHeaderLines() {
789 HeaderLines::size_type content_length_idx = 0;
790 HeaderLines::size_type transfer_encoding_idx = 0;
792 DCHECK(!lines_.empty());
793 #if DEBUGFRAMER
794 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
795 #endif // DEBUGFRAMER
797 // There is no need to attempt to process headers if no header lines exist.
798 // There are at least two lines in the message which are not header lines.
799 // These two non-header lines are the first line of the message, and the
800 // last line of the message (which is an empty line).
801 // Thus, we test to see if we have more than two lines total before attempting
802 // to parse any header lines.
803 if (lines_.size() > 2) {
804 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
806 // Then, for the rest of the header data, we parse these into key-value
807 // pairs.
808 FindColonsAndParseIntoKeyValue();
809 // At this point, we've parsed all of the headers. Time to look for those
810 // headers which we require for framing.
811 const HeaderLines::size_type
812 header_lines_size = headers_->header_lines_.size();
813 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
814 const HeaderLineDescription& current_header_line =
815 headers_->header_lines_[i];
816 const char* key_begin =
817 (stream_begin + current_header_line.first_char_idx);
818 const char* key_end = (stream_begin + current_header_line.key_end_idx);
819 const size_t key_len = key_end - key_begin;
820 const char c = *key_begin;
821 #if DEBUGFRAMER
822 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)
823 << " c: '" << c << "' key_len: " << key_len;
824 #endif // DEBUGFRAMER
825 // If a header begins with either lowercase or uppercase 'c' or 't', then
826 // the header may be one of content-length, connection, content-encoding
827 // or transfer-encoding. These headers are special, as they change the way
828 // that the message is framed, and so the framer is required to search
829 // for them.
832 if (c == 'c' || c == 'C') {
833 if ((key_len == kContentLengthSize) &&
834 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {
835 BalsaHeadersEnums::ContentLengthStatus content_length_status =
836 BalsaHeadersEnums::NO_CONTENT_LENGTH;
837 size_t length = 0;
838 ProcessContentLengthLine(i, &content_length_status, &length);
839 if (content_length_idx != 0) { // then we've already seen one!
840 if ((headers_->content_length_status_ != content_length_status) ||
841 ((headers_->content_length_status_ ==
842 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
843 length != headers_->content_length_)) {
844 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;
845 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
846 visitor_->HandleHeaderError(this);
847 return;
849 continue;
850 } else {
851 content_length_idx = i + 1;
852 headers_->content_length_status_ = content_length_status;
853 headers_->content_length_ = length;
854 content_length_remaining_ = length;
858 } else if (c == 't' || c == 'T') {
859 if ((key_len == kTransferEncodingSize) &&
860 0 == strncasecmp(key_begin, kTransferEncoding,
861 kTransferEncodingSize)) {
862 if (transfer_encoding_idx != 0) {
863 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;
864 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
865 visitor_->HandleHeaderError(this);
866 return;
868 transfer_encoding_idx = i + 1;
870 } else if (i == 0 && (key_len == 0 || c == ' ')) {
871 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;
872 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
873 visitor_->HandleHeaderError(this);
874 return;
877 if (headers_->transfer_encoding_is_chunked_) {
878 headers_->content_length_ = 0;
879 headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
880 content_length_remaining_ = 0;
882 if (transfer_encoding_idx != 0) {
883 ProcessTransferEncodingLine(transfer_encoding_idx - 1);
888 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
889 // For responses, can't have a body if the request was a HEAD, or if it is
890 // one of these response-codes. rfc2616 section 4.3
891 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
892 if (is_request_ ||
893 !(request_was_head_ ||
894 (headers_->parsed_response_code_ >= 100 &&
895 headers_->parsed_response_code_ < 200) ||
896 (headers_->parsed_response_code_ == 204) ||
897 (headers_->parsed_response_code_ == 304))) {
898 // Then we can have a body.
899 if (headers_->transfer_encoding_is_chunked_) {
900 // Note that
901 // if ( Transfer-Encoding: chunked && Content-length: )
902 // then Transfer-Encoding: chunked trumps.
903 // This is as specified in the spec.
904 // rfc2616 section 4.4.3
905 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
906 } else {
907 // Errors parsing content-length definitely can cause
908 // protocol errors/warnings
909 switch (headers_->content_length_status_) {
910 // If we have a content-length, and it is parsed
911 // properly, there are two options.
912 // 1) zero content, in which case the message is done, and
913 // 2) nonzero content, in which case we have to
914 // consume the body.
915 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
916 if (headers_->content_length_ == 0) {
917 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
918 } else {
919 parse_state_ = BalsaFrameEnums::READING_CONTENT;
921 break;
922 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
923 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
924 // If there were characters left-over after parsing the
925 // content length, we should flag an error and stop.
926 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
927 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;
928 visitor_->HandleHeaderError(this);
929 break;
930 // We can have: no transfer-encoding, no content length, and no
931 // connection: close...
932 // Unfortunately, this case doesn't seem to be covered in the spec.
933 // We'll assume that the safest thing to do here is what the google
934 // binaries before 2008 already do, which is to assume that
935 // everything until the connection is closed is body.
936 case BalsaHeadersEnums::NO_CONTENT_LENGTH:
937 if (is_request_) {
938 base::StringPiece method = headers_->request_method();
939 // POSTs and PUTs should have a detectable body length. If they
940 // do not we consider it an error.
941 if ((method.size() == 4 &&
942 strncmp(method.data(), "POST", 4) == 0) ||
943 (method.size() == 3 &&
944 strncmp(method.data(), "PUT", 3) == 0)) {
945 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
946 last_error_ =
947 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;
948 visitor_->HandleHeaderError(this);
949 break;
951 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
952 } else {
953 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
954 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;
955 visitor_->HandleHeaderWarning(this);
957 break;
958 // The COV_NF_... statements here provide hints to the apparatus
959 // which computes coverage reports/ratios that this code is never
960 // intended to be executed, and should technically be impossible.
961 // COV_NF_START
962 default:
963 LOG(FATAL) << "Saw a content_length_status: "
964 << headers_->content_length_status_ << " which is unknown.";
965 // COV_NF_END
971 size_t BalsaFrame::ProcessHeaders(const char* message_start,
972 size_t message_length) {
973 const char* const original_message_start = message_start;
974 const char* const message_end = message_start + message_length;
975 const char* message_current = message_start;
976 const char* checkpoint = message_start;
978 if (message_length == 0) {
979 goto bottom;
982 while (message_current < message_end) {
983 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
985 // Yes, we could use strchr (assuming null termination), or
986 // memchr, but as it turns out that is slower than this tight loop
987 // for the input that we see.
988 if (!saw_non_newline_char_) {
989 do {
990 const char c = *message_current;
991 if (c != '\r' && c != '\n') {
992 if (c <= ' ') {
993 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
994 last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST;
995 visitor_->HandleHeaderError(this);
996 goto bottom;
997 } else {
998 saw_non_newline_char_ = true;
999 checkpoint = message_start = message_current;
1000 goto read_real_message;
1003 ++message_current;
1004 } while (message_current < message_end);
1005 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks
1006 } else {
1007 read_real_message:
1008 // Note that SSE2 can be enabled on certain piii platforms.
1009 #if __SSE2__
1011 const char* const message_end_m16 = message_end - 16;
1012 __v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
1013 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' };
1014 while (message_current < message_end_m16) {
1015 // What this does (using compiler intrinsics):
1017 // Load 16 '\n's into an xmm register
1018 // Load 16 bytes of currennt message into an xmm register
1019 // Do byte-wise equals on those two xmm registers
1020 // Take the first bit of each byte, and put that into the first
1021 // 16 bits of a mask
1022 // If the mask is zero, no '\n' found. increment by 16 and try again
1023 // Else scan forward to find the first set bit.
1024 // Increment current by the index of the first set bit
1025 // (ffs returns index of first set bit + 1)
1026 __m128i msg_bytes =
1027 _mm_loadu_si128(const_cast<__m128i *>(
1028 reinterpret_cast<const __m128i *>(message_current)));
1029 __m128i newline_cmp =
1030 _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines));
1031 int newline_msk = _mm_movemask_epi8(newline_cmp);
1032 if (newline_msk == 0) {
1033 message_current += 16;
1034 continue;
1036 message_current += (ffs(newline_msk) - 1);
1037 const size_t relative_idx = message_current - message_start;
1038 const size_t message_current_idx = 1 + base_idx + relative_idx;
1039 lines_.push_back(std::make_pair(last_slash_n_idx_,
1040 message_current_idx));
1041 if (lines_.size() == 1) {
1042 headers_->WriteFromFramer(checkpoint,
1043 1 + message_current - checkpoint);
1044 checkpoint = message_current + 1;
1045 const char* begin = headers_->OriginalHeaderStreamBegin();
1046 #if DEBUGFRAMER
1047 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
1048 LOG(INFO) << "is_request_: " << is_request_;
1049 #endif
1050 ProcessFirstLine(begin, begin + lines_[0].second);
1051 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
1052 goto process_lines;
1053 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1054 goto bottom;
1056 const size_t chars_since_last_slash_n = (message_current_idx -
1057 last_slash_n_idx_);
1058 last_slash_n_idx_ = message_current_idx;
1059 if (chars_since_last_slash_n > 2) {
1060 // We have a slash-n, but the last slash n was
1061 // more than 2 characters away from this. Thus, we know
1062 // that this cannot be an end-of-header.
1063 ++message_current;
1064 continue;
1066 if ((chars_since_last_slash_n == 1) ||
1067 (((message_current > message_start) &&
1068 (*(message_current - 1) == '\r')) ||
1069 (last_char_was_slash_r_))) {
1070 goto process_lines;
1072 ++message_current;
1075 #endif // __SSE2__
1076 while (message_current < message_end) {
1077 if (*message_current != '\n') {
1078 ++message_current;
1079 continue;
1081 const size_t relative_idx = message_current - message_start;
1082 const size_t message_current_idx = 1 + base_idx + relative_idx;
1083 lines_.push_back(std::make_pair(last_slash_n_idx_,
1084 message_current_idx));
1085 if (lines_.size() == 1) {
1086 headers_->WriteFromFramer(checkpoint,
1087 1 + message_current - checkpoint);
1088 checkpoint = message_current + 1;
1089 const char* begin = headers_->OriginalHeaderStreamBegin();
1090 #if DEBUGFRAMER
1091 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
1092 LOG(INFO) << "is_request_: " << is_request_;
1093 #endif
1094 ProcessFirstLine(begin, begin + lines_[0].second);
1095 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
1096 goto process_lines;
1097 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1098 goto bottom;
1100 const size_t chars_since_last_slash_n = (message_current_idx -
1101 last_slash_n_idx_);
1102 last_slash_n_idx_ = message_current_idx;
1103 if (chars_since_last_slash_n > 2) {
1104 // false positive.
1105 ++message_current;
1106 continue;
1108 if ((chars_since_last_slash_n == 1) ||
1109 (((message_current > message_start) &&
1110 (*(message_current - 1) == '\r')) ||
1111 (last_char_was_slash_r_))) {
1112 goto process_lines;
1114 ++message_current;
1117 continue;
1118 process_lines:
1119 ++message_current;
1120 DCHECK(message_current >= message_start);
1121 if (message_current > message_start) {
1122 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
1125 // Check if we have exceeded maximum headers length
1126 // Although we check for this limit before and after we call this function
1127 // we check it here as well to make sure that in case the visitor changed
1128 // the max_header_length_ (for example after processing the first line)
1129 // we handle it gracefully.
1130 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
1131 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1132 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1133 visitor_->HandleHeaderError(this);
1134 goto bottom;
1137 // Since we know that we won't be writing any more bytes of the header,
1138 // we tell that to the headers object. The headers object may make
1139 // more efficient allocation decisions when this is signaled.
1140 headers_->DoneWritingFromFramer();
1142 const char* readable_ptr = NULL;
1143 size_t readable_size = 0;
1144 headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size);
1145 visitor_->ProcessHeaderInput(readable_ptr, readable_size);
1148 // Ok, now that we've written everything into our header buffer, it is
1149 // time to process the header lines (extract proper values for headers
1150 // which are important for framing).
1151 ProcessHeaderLines();
1152 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1153 goto bottom;
1155 AssignParseStateAfterHeadersHaveBeenParsed();
1156 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1157 goto bottom;
1159 visitor_->ProcessHeaders(*headers_);
1160 visitor_->HeaderDone();
1161 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
1162 visitor_->MessageDone();
1164 goto bottom;
1166 // If we've gotten to here, it means that we've consumed all of the
1167 // available input. We need to record whether or not the last character we
1168 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
1169 // a header framing that is split across the two calls.
1170 last_char_was_slash_r_ = (*(message_end - 1) == '\r');
1171 DCHECK(message_current >= message_start);
1172 if (message_current > message_start) {
1173 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
1175 bottom:
1176 return message_current - original_message_start;
1180 size_t BalsaFrame::BytesSafeToSplice() const {
1181 switch (parse_state_) {
1182 case BalsaFrameEnums::READING_CHUNK_DATA:
1183 return chunk_length_remaining_;
1184 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1185 return std::numeric_limits<size_t>::max();
1186 case BalsaFrameEnums::READING_CONTENT:
1187 return content_length_remaining_;
1188 default:
1189 return 0;
1193 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
1194 switch (parse_state_) {
1195 case BalsaFrameEnums::READING_CHUNK_DATA:
1196 if (chunk_length_remaining_ >= bytes_spliced) {
1197 chunk_length_remaining_ -= bytes_spliced;
1198 if (chunk_length_remaining_ == 0) {
1199 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1201 return;
1202 } else {
1203 last_error_ =
1204 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1205 goto error_exit;
1208 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1209 return;
1211 case BalsaFrameEnums::READING_CONTENT:
1212 if (content_length_remaining_ >= bytes_spliced) {
1213 content_length_remaining_ -= bytes_spliced;
1214 if (content_length_remaining_ == 0) {
1215 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1216 visitor_->MessageDone();
1218 return;
1219 } else {
1220 last_error_ =
1221 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1222 goto error_exit;
1225 default:
1226 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;
1227 goto error_exit;
1230 error_exit:
1231 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1232 visitor_->HandleBodyError(this);
1235 // You may note that the state-machine contained within this function has both
1236 // switch and goto labels for nearly the same thing. For instance, the
1237 // following two labels refer to the same code block:
1238 // label_reading_chunk_data:
1239 // case BalsaFrameEnums::READING_CHUNK_DATA:
1240 // The 'case' statement is required for the switch statement which occurs when
1241 // ProcessInput is invoked. The goto label is required as the state-machine
1242 // does not use a computed goto in any subsequent operations.
1244 // Since several states exit the state machine for various reasons, there is
1245 // also one label at the bottom of the function. When it is appropriate to
1246 // return from the function, that part of the state machine instead issues a
1247 // goto bottom; This results in less code duplication, and makes debugging
1248 // easier (as you can add a statement to a section of code which is guaranteed
1249 // to be invoked when the function is exiting.
1250 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
1251 const char* current = input;
1252 const char* on_entry = current;
1253 const char* end = current + size;
1254 #if DEBUGFRAMER
1255 LOG(INFO) << "\n=============="
1256 << BalsaFrameEnums::ParseStateToString(parse_state_)
1257 << "===============\n";
1258 #endif // DEBUGFRAMER
1260 DCHECK(headers_ != NULL);
1261 if (headers_ == NULL) return 0;
1263 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
1264 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
1265 // Yes, we still have to check this here as the user can change the
1266 // max_header_length amount!
1267 // Also it is possible that we have reached the maximum allowed header size,
1268 // and we have more to consume (remember we are still inside
1269 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
1270 if (header_length > max_header_length_ ||
1271 (header_length == max_header_length_ && size > 0)) {
1272 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1273 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1274 visitor_->HandleHeaderError(this);
1275 goto bottom;
1277 size_t bytes_to_process = max_header_length_ - header_length;
1278 if (bytes_to_process > size) {
1279 bytes_to_process = size;
1281 current += ProcessHeaders(input, bytes_to_process);
1282 // If we are still reading headers check if we have crossed the headers
1283 // limit. Note that we check for >= as opposed to >. This is because if
1284 // header_length_after equals max_header_length_ and we are still in the
1285 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
1286 // sure that the headers limit will be crossed later on
1287 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
1288 // Note that headers_ is valid only if we are still reading headers.
1289 const size_t header_length_after =
1290 headers_->GetReadableBytesFromHeaderStream();
1291 if (header_length_after >= max_header_length_) {
1292 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1293 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1294 visitor_->HandleHeaderError(this);
1297 goto bottom;
1298 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
1299 parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1300 // Can do nothing more 'till we're reset.
1301 goto bottom;
1304 while (current < end) {
1305 switch (parse_state_) {
1306 label_reading_chunk_length:
1307 case BalsaFrameEnums::READING_CHUNK_LENGTH:
1308 // In this state we read the chunk length.
1309 // Note that once we hit a character which is not in:
1310 // [0-9;A-Fa-f\n], we transition to a different state.
1313 // If we used strtol, etc, we'd have to buffer this line.
1314 // This is more annoying than simply doing the conversion
1315 // here. This code accounts for overflow.
1316 static const signed char buf[] = {
1317 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f
1318 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
1319 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f
1320 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1321 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f
1322 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1323 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f
1324 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1,
1325 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f
1326 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1327 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f
1328 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1329 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f
1330 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1331 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f
1332 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1334 // valid cases:
1335 // "09123\n" // -> 09123
1336 // "09123\r\n" // -> 09123
1337 // "09123 \n" // -> 09123
1338 // "09123 \r\n" // -> 09123
1339 // "09123 12312\n" // -> 09123
1340 // "09123 12312\r\n" // -> 09123
1341 // "09123; foo=bar\n" // -> 09123
1342 // "09123; foo=bar\r\n" // -> 09123
1343 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF
1344 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF
1345 // invalid cases:
1346 // "[ \t]+[^\n]*\n"
1347 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)
1348 // "\r\n"
1349 // "\n"
1350 while (current < end) {
1351 const char c = *current;
1352 ++current;
1353 const signed char addition = buf[static_cast<int>(c)];
1354 if (addition >= 0) {
1355 chunk_length_character_extracted_ = true;
1356 size_t length_x_16 = chunk_length_remaining_ * 16;
1357 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1358 if ((chunk_length_remaining_ > kMaxDiv16) ||
1359 ((std::numeric_limits<size_t>::max() - length_x_16) <
1360 static_cast<size_t>(addition))) {
1361 // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1362 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1363 last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW;
1364 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1365 visitor_->HandleChunkingError(this);
1366 goto bottom;
1368 chunk_length_remaining_ = length_x_16 + addition;
1369 continue;
1372 if (!chunk_length_character_extracted_ || addition == -1) {
1373 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1374 // characters were converted, or an unexpected character was
1375 // seen.
1376 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1377 last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH;
1378 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1379 visitor_->HandleChunkingError(this);
1380 goto bottom;
1383 --current;
1384 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1385 visitor_->ProcessChunkLength(chunk_length_remaining_);
1386 goto label_reading_chunk_extension;
1389 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1390 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH
1392 label_reading_chunk_extension:
1393 case BalsaFrameEnums::READING_CHUNK_EXTENSION:
1395 // TODO(phython): Convert this scanning to be 16 bytes at a time if
1396 // there is data to be read.
1397 const char* extensions_start = current;
1398 size_t extensions_length = 0;
1399 while (current < end) {
1400 const char c = *current;
1401 if (c == '\r' || c == '\n') {
1402 extensions_length =
1403 (extensions_start == current) ?
1405 current - extensions_start - 1;
1408 ++current;
1409 if (c == '\n') {
1410 chunk_length_character_extracted_ = false;
1411 visitor_->ProcessChunkExtensions(
1412 extensions_start, extensions_length);
1413 if (chunk_length_remaining_ != 0) {
1414 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1415 goto label_reading_chunk_data;
1417 HeaderFramingFound('\n');
1418 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1419 goto label_reading_last_chunk_term;
1422 visitor_->ProcessChunkExtensions(
1423 extensions_start, extensions_length);
1426 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1427 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION
1429 label_reading_chunk_data:
1430 case BalsaFrameEnums::READING_CHUNK_DATA:
1431 while (current < end) {
1432 if (chunk_length_remaining_ == 0) {
1433 break;
1435 // read in the chunk
1436 size_t bytes_remaining = end - current;
1437 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ?
1438 chunk_length_remaining_ : bytes_remaining;
1439 const char* tmp_current = current + consumed_bytes;
1440 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);
1441 visitor_->ProcessBodyData(current, consumed_bytes);
1442 on_entry = current = tmp_current;
1443 chunk_length_remaining_ -= consumed_bytes;
1445 if (chunk_length_remaining_ == 0) {
1446 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1447 goto label_reading_chunk_term;
1449 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1450 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA
1452 label_reading_chunk_term:
1453 case BalsaFrameEnums::READING_CHUNK_TERM:
1454 while (current < end) {
1455 const char c = *current;
1456 ++current;
1458 if (c == '\n') {
1459 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1460 goto label_reading_chunk_length;
1463 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1464 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM
1466 label_reading_last_chunk_term:
1467 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1468 while (current < end) {
1469 const char c = *current;
1471 if (!HeaderFramingFound(c)) {
1472 // If not, however, since the spec only suggests that the
1473 // client SHOULD indicate the presence of trailers, we get to
1474 // *test* that they did or didn't.
1475 // If all of the bytes we've seen since:
1476 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1477 // are either '\r', or '\n', then we can assume that we don't yet
1478 // know if we need to parse headers, or if the next byte will make
1479 // the HeaderFramingFound condition (above) true.
1480 if (HeaderFramingMayBeFound()) {
1481 // If true, then we have seen only characters '\r' or '\n'.
1482 ++current;
1484 // Lets try again! There is no state change here.
1485 continue;
1486 } else {
1487 // If (!HeaderFramingMayBeFound()), then we know that we must be
1488 // reading the first non CRLF character of a trailer.
1489 parse_state_ = BalsaFrameEnums::READING_TRAILER;
1490 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1491 on_entry = current;
1492 goto label_reading_trailer;
1494 } else {
1495 // If we've found a "\r\n\r\n", then the message
1496 // is done.
1497 ++current;
1498 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1499 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1500 visitor_->MessageDone();
1501 goto bottom;
1503 break; // from while loop
1505 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1506 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM
1508 label_reading_trailer:
1509 case BalsaFrameEnums::READING_TRAILER:
1510 while (current < end) {
1511 const char c = *current;
1512 ++current;
1513 // TODO(fenix): If we ever care about trailers as part of framing,
1514 // deal with them here (see below for part of the 'solution')
1515 // if (LineFramingFound(c)) {
1516 // trailer_lines_.push_back(make_pair(start_of_line_,
1517 // trailer_length_ - 1));
1518 // start_of_line_ = trailer_length_;
1519 // }
1520 if (HeaderFramingFound(c)) {
1521 // ProcessTrailers(visitor_, &trailers_);
1522 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1523 visitor_->ProcessTrailerInput(on_entry, current - on_entry);
1524 visitor_->MessageDone();
1525 goto bottom;
1528 visitor_->ProcessTrailerInput(on_entry, current - on_entry);
1529 break; // case BalsaFrameEnums::READING_TRAILER
1531 // Note that there is no label:
1532 // 'label_reading_until_close'
1533 // here. This is because the state-machine exists immediately after
1534 // reading the headers instead of transitioning here (as it would
1535 // do if it was consuming all the data it could, all the time).
1536 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1538 const size_t bytes_remaining = end - current;
1539 if (bytes_remaining > 0) {
1540 visitor_->ProcessBodyInput(current, bytes_remaining);
1541 visitor_->ProcessBodyData(current, bytes_remaining);
1542 current += bytes_remaining;
1545 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE
1547 // label_reading_content:
1548 case BalsaFrameEnums::READING_CONTENT:
1549 #if DEBUGFRAMER
1550 LOG(INFO) << "ReadingContent: " << content_length_remaining_;
1551 #endif // DEBUGFRAMER
1552 while (content_length_remaining_ && current < end) {
1553 // read in the content
1554 const size_t bytes_remaining = end - current;
1555 const size_t consumed_bytes =
1556 (content_length_remaining_ < bytes_remaining) ?
1557 content_length_remaining_ : bytes_remaining;
1558 visitor_->ProcessBodyInput(current, consumed_bytes);
1559 visitor_->ProcessBodyData(current, consumed_bytes);
1560 current += consumed_bytes;
1561 content_length_remaining_ -= consumed_bytes;
1563 if (content_length_remaining_ == 0) {
1564 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1565 visitor_->MessageDone();
1567 goto bottom; // case BalsaFrameEnums::READING_CONTENT
1569 default:
1570 // The state-machine should never be in a state that isn't handled
1571 // above. This is a glaring logic error, and we should do something
1572 // drastic to ensure that this gets looked-at and fixed.
1573 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
1574 << " memory corruption?!"; // COV_NF_LINE
1577 bottom:
1578 #if DEBUGFRAMER
1579 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"
1580 << std::string(input, current)
1581 << "\n$$$$$$$$$$$$$$"
1582 << BalsaFrameEnums::ParseStateToString(parse_state_)
1583 << "$$$$$$$$$$$$$$$"
1584 << " consumed: " << (current - input);
1585 if (Error()) {
1586 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());
1588 #endif // DEBUGFRAMER
1589 return current - input;
1592 const uint32 BalsaFrame::kValidTerm1;
1593 const uint32 BalsaFrame::kValidTerm1Mask;
1594 const uint32 BalsaFrame::kValidTerm2;
1595 const uint32 BalsaFrame::kValidTerm2Mask;
1597 } // namespace net