2 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
5 * The software is licensed under either the MIT License (below) or the Perl
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 # include <nmmintrin.h>
34 # include <x86intrin.h>
37 #include "picohttpparser.h"
40 # define likely(x) __builtin_expect(!!(x), 1)
41 # define unlikely(x) __builtin_expect(!!(x), 0)
43 # define likely(x) (x)
44 # define unlikely(x) (x)
48 # define ALIGNED(n) _declspec(align(n))
50 # define ALIGNED(n) __attribute__((aligned(n)))
53 #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
56 if (buf == buf_end) { \
61 #define EXPECT_CHAR_NO_CHECK(ch) \
67 #define EXPECT_CHAR(ch) \
69 EXPECT_CHAR_NO_CHECK(ch);
71 #define ADVANCE_TOKEN(tok, toklen) \
73 const char *tok_start = buf; \
74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \
93 toklen = buf - tok_start; \
96 static const char *token_char_map
= "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
105 static const char *findchar_fast(const char *buf
, const char *buf_end
, const char *ranges
, size_t ranges_size
, int *found
) {
108 if (likely(buf_end
- buf
>= 16)) {
109 __m128i ranges16
= _mm_loadu_si128((const __m128i
*)ranges
);
111 size_t left
= (buf_end
- buf
) & ~15;
113 __m128i b16
= _mm_loadu_si128((const __m128i
*)buf
);
114 int r
= _mm_cmpestri(ranges16
, ranges_size
, b16
, 16, _SIDD_LEAST_SIGNIFICANT
| _SIDD_CMP_RANGES
| _SIDD_UBYTE_OPS
);
115 if (unlikely(r
!= 16)) {
122 } while (likely(left
!= 0));
125 /* suppress unused parameter warning */
133 static const char *get_token_to_eol(const char *buf
, const char *buf_end
, const char **token
, size_t *token_len
, int *ret
) {
134 const char *token_start
= buf
;
137 static const char ALIGNED(16) ranges1
[16] = "\0\010" /* allow HT */
138 "\012\037" /* allow SP and up to but not including DEL */
139 "\177\177"; /* allow chars w. MSB set */
141 buf
= findchar_fast(buf
, buf_end
, ranges1
, 6, &found
);
145 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
146 while (likely(buf_end
- buf
>= 8)) {
149 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
164 if ((likely((unsigned char)*buf
< '\040') && likely(*buf
!= '\011')) || unlikely(*buf
== '\177')) {
172 if (unlikely(!IS_PRINTABLE_ASCII(*buf
))) {
173 if ((likely((unsigned char)*buf
< '\040') && likely(*buf
!= '\011')) || unlikely(*buf
== '\177')) {
179 if (likely(*buf
== '\015')) {
182 *token_len
= buf
- 2 - token_start
;
183 } else if (*buf
== '\012') {
184 *token_len
= buf
- token_start
;
190 *token
= token_start
;
195 static const char *is_complete(const char *buf
, const char *buf_end
, size_t last_len
, int *ret
) {
197 buf
= last_len
< 3 ? buf
: buf
+ last_len
- 3;
201 if (*buf
== '\015') {
206 } else if (*buf
== '\012') {
222 #define PARSE_INT(valp_, mul_) \
223 if (*buf < '0' || '9' < *buf) { \
228 *(valp_) = (mul_) * (*buf++ - '0');
230 #define PARSE_INT_3(valp_) \
233 PARSE_INT(&res_, 100) \
235 PARSE_INT(&res_, 10) \
237 PARSE_INT(&res_, 1) \
241 /* returned pointer is always within [buf, buf_end), or null */
242 static const char *parse_http_version(const char *buf
, const char *buf_end
, int *major_version
, int *minor_version
, int *ret
) {
243 /* we want at least [HTTP/1.<two chars>] to try to parse */
244 if (buf_end
- buf
< 9) {
248 EXPECT_CHAR_NO_CHECK('H');
249 EXPECT_CHAR_NO_CHECK('T');
250 EXPECT_CHAR_NO_CHECK('T');
251 EXPECT_CHAR_NO_CHECK('P');
252 EXPECT_CHAR_NO_CHECK('/');
253 PARSE_INT(major_version
, 1);
254 if (*major_version
== 1) {
255 EXPECT_CHAR_NO_CHECK('.');
256 PARSE_INT(minor_version
, 1);
263 static const char *parse_headers(const char *buf
, const char *buf_end
, struct phr_header
*headers
, size_t *num_headers
, size_t max_headers
,
265 for (;; ++*num_headers
) {
267 if (*buf
== '\015') {
271 } else if (*buf
== '\012') {
275 if (*num_headers
== max_headers
) {
279 if (!(*num_headers
!= 0 && (*buf
== ' ' || *buf
== '\t'))) {
280 /* parsing name, but do not discard SP before colon, see
281 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
282 headers
[*num_headers
].name
= buf
;
283 static const char ALIGNED(16) ranges1
[] = "\x00 " /* control chars and up to SP */
290 "{\377"; /* 0x7b-0xff */
292 buf
= findchar_fast(buf
, buf_end
, ranges1
, sizeof(ranges1
) - 1, &found
);
299 } else if (!token_char_map
[(unsigned char)*buf
]) {
306 if ((headers
[*num_headers
].name_len
= buf
- headers
[*num_headers
].name
) == 0) {
313 if (!(*buf
== ' ' || *buf
== '\t')) {
318 headers
[*num_headers
].name
= NULL
;
319 headers
[*num_headers
].name_len
= 0;
323 if ((buf
= get_token_to_eol(buf
, buf_end
, &value
, &value_len
, ret
)) == NULL
) {
326 /* remove trailing SPs and HTABs */
327 const char *value_end
= value
+ value_len
;
328 for (; value_end
!= value
; --value_end
) {
329 const char c
= *(value_end
- 1);
330 if (!(c
== ' ' || c
== '\t')) {
334 headers
[*num_headers
].value
= value
;
335 headers
[*num_headers
].value_len
= value_end
- value
;
340 static const char *parse_request(const char *buf
, const char *buf_end
, const char **method
, size_t *method_len
, const char **path
,
341 size_t *path_len
, int *major_version
, int *minor_version
, struct phr_header
*headers
, size_t *num_headers
,
342 size_t max_headers
, int *ret
) {
343 /* skip first empty line (some clients add CRLF after POST content) */
345 if (*buf
== '\015') {
348 } else if (*buf
== '\012') {
352 /* parse request line */
353 ADVANCE_TOKEN(*method
, *method_len
);
356 } while (*buf
== ' ');
357 ADVANCE_TOKEN(*path
, *path_len
);
360 } while (*buf
== ' ');
361 if (*method_len
== 0 || *path_len
== 0) {
365 if ((buf
= parse_http_version(buf
, buf_end
, major_version
, minor_version
, ret
)) == NULL
) {
368 if (*buf
== '\015') {
371 } else if (*buf
== '\012') {
378 return parse_headers(buf
, buf_end
, headers
, num_headers
, max_headers
, ret
);
381 int phr_parse_request(const char *buf_start
, size_t len
, const char **method
, size_t *method_len
, const char **path
, size_t *path_len
,
382 int *major_version
, int *minor_version
, struct phr_header
*headers
, size_t *num_headers
, size_t last_len
) {
383 const char *buf
= buf_start
, *buf_end
= buf_start
+ len
;
384 size_t max_headers
= *num_headers
;
395 /* if last_len != 0, check if the request is complete (a fast countermeasure
397 if (last_len
!= 0 && is_complete(buf
, buf_end
, last_len
, &r
) == NULL
) {
401 if ((buf
= parse_request(buf
, buf_end
, method
, method_len
, path
, path_len
, major_version
, minor_version
, headers
, num_headers
,
402 max_headers
, &r
)) == NULL
) {
406 return (int)(buf
- buf_start
);
409 static const char *parse_response(const char *buf
, const char *buf_end
, int *major_version
, int *minor_version
, int *status
,
410 const char **msg
, size_t *msg_len
, struct phr_header
*headers
, size_t *num_headers
, size_t max_headers
,
412 /* parse "HTTP/1.x" */
413 if ((buf
= parse_http_version(buf
, buf_end
, major_version
, minor_version
, ret
)) == NULL
) {
423 } while (*buf
== ' ');
424 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
425 if (buf_end
- buf
< 4) {
431 /* get message including preceding space */
432 if ((buf
= get_token_to_eol(buf
, buf_end
, msg
, msg_len
, ret
)) == NULL
) {
437 } else if (**msg
== ' ') {
438 /* remove preceding space */
442 } while (**msg
== ' ');
444 /* garbage found after status code */
449 return parse_headers(buf
, buf_end
, headers
, num_headers
, max_headers
, ret
);
452 int phr_parse_response(const char *buf_start
, size_t len
, int *major_version
, int *minor_version
, int *status
, const char **msg
,
453 size_t *msg_len
, struct phr_header
*headers
, size_t *num_headers
, size_t last_len
) {
454 const char *buf
= buf_start
, *buf_end
= buf
+ len
;
455 size_t max_headers
= *num_headers
;
465 /* if last_len != 0, check if the response is complete (a fast countermeasure
467 if (last_len
!= 0 && is_complete(buf
, buf_end
, last_len
, &r
) == NULL
) {
471 if ((buf
= parse_response(buf
, buf_end
, major_version
, minor_version
, status
, msg
, msg_len
, headers
, num_headers
, max_headers
, &r
)) ==
476 return (int)(buf
- buf_start
);
479 int phr_parse_headers(const char *buf_start
, size_t len
, struct phr_header
*headers
, size_t *num_headers
, size_t last_len
) {
480 const char *buf
= buf_start
, *buf_end
= buf
+ len
;
481 size_t max_headers
= *num_headers
;
486 /* if last_len != 0, check if the response is complete (a fast countermeasure
488 if (last_len
!= 0 && is_complete(buf
, buf_end
, last_len
, &r
) == NULL
) {
492 if ((buf
= parse_headers(buf
, buf_end
, headers
, num_headers
, max_headers
, &r
)) == NULL
) {
496 return (int)(buf
- buf_start
);
500 CHUNKED_IN_CHUNK_SIZE
,
501 CHUNKED_IN_CHUNK_EXT
,
502 CHUNKED_IN_CHUNK_DATA
,
503 CHUNKED_IN_CHUNK_CRLF
,
504 CHUNKED_IN_TRAILERS_LINE_HEAD
,
505 CHUNKED_IN_TRAILERS_LINE_MIDDLE
508 static int decode_hex(int ch
) {
509 if ('0' <= ch
&& ch
<= '9') {
511 } else if ('A' <= ch
&& ch
<= 'F') {
512 return ch
- 'A' + 0xa;
513 } else if ('a' <= ch
&& ch
<= 'f') {
514 return ch
- 'a' + 0xa;
520 ssize_t
phr_decode_chunked(struct phr_chunked_decoder
*decoder
, char *buf
, size_t *_bufsz
) {
521 size_t dst
= 0, src
= 0, bufsz
= *_bufsz
;
522 ssize_t ret
= -2; /* incomplete */
525 switch (decoder
->_state
) {
526 case CHUNKED_IN_CHUNK_SIZE
:
531 if ((v
= decode_hex(buf
[src
])) == -1) {
532 if (decoder
->_hex_count
== 0) {
538 if (decoder
->_hex_count
== sizeof(size_t) * 2) {
542 decoder
->bytes_left_in_chunk
= decoder
->bytes_left_in_chunk
* 16 + v
;
543 ++decoder
->_hex_count
;
545 decoder
->_hex_count
= 0;
546 decoder
->_state
= CHUNKED_IN_CHUNK_EXT
;
548 case CHUNKED_IN_CHUNK_EXT
:
549 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
553 if (buf
[src
] == '\012')
557 if (decoder
->bytes_left_in_chunk
== 0) {
558 if (decoder
->consume_trailer
) {
559 decoder
->_state
= CHUNKED_IN_TRAILERS_LINE_HEAD
;
565 decoder
->_state
= CHUNKED_IN_CHUNK_DATA
;
567 case CHUNKED_IN_CHUNK_DATA
: {
568 size_t avail
= bufsz
- src
;
569 if (avail
< decoder
->bytes_left_in_chunk
) {
571 memmove(buf
+ dst
, buf
+ src
, avail
);
574 decoder
->bytes_left_in_chunk
-= avail
;
578 memmove(buf
+ dst
, buf
+ src
, decoder
->bytes_left_in_chunk
);
579 src
+= decoder
->bytes_left_in_chunk
;
580 dst
+= decoder
->bytes_left_in_chunk
;
581 decoder
->bytes_left_in_chunk
= 0;
582 decoder
->_state
= CHUNKED_IN_CHUNK_CRLF
;
585 case CHUNKED_IN_CHUNK_CRLF
:
589 if (buf
[src
] != '\015')
592 if (buf
[src
] != '\012') {
597 decoder
->_state
= CHUNKED_IN_CHUNK_SIZE
;
599 case CHUNKED_IN_TRAILERS_LINE_HEAD
:
603 if (buf
[src
] != '\015')
606 if (buf
[src
++] == '\012')
608 decoder
->_state
= CHUNKED_IN_TRAILERS_LINE_MIDDLE
;
610 case CHUNKED_IN_TRAILERS_LINE_MIDDLE
:
614 if (buf
[src
] == '\012')
618 decoder
->_state
= CHUNKED_IN_TRAILERS_LINE_HEAD
;
621 assert(!"decoder is corrupt");
629 memmove(buf
+ dst
, buf
+ src
, bufsz
- src
);
634 int phr_decode_chunked_is_in_data(struct phr_chunked_decoder
*decoder
) { return decoder
->_state
== CHUNKED_IN_CHUNK_DATA
; }