Merge pull request #2045 from RincewindsHat/fix/calloc_argument_order
[monitoring-plugins.git] / plugins / picohttpparser / picohttpparser.c
blob2ae92d66dc1c02f7247cb8c935924930e640eb69
1 /*
2 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3 * Shigeo Mitsunari
5 * The software is licensed under either the MIT License (below) or the Perl
6 * license.
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
27 #include <assert.h>
28 #include <stddef.h>
29 #include <string.h>
30 #ifdef __SSE4_2__
31 # ifdef _MSC_VER
32 # include <nmmintrin.h>
33 # else
34 # include <x86intrin.h>
35 # endif
36 #endif
37 #include "picohttpparser.h"
39 #if __GNUC__ >= 3
40 # define likely(x) __builtin_expect(!!(x), 1)
41 # define unlikely(x) __builtin_expect(!!(x), 0)
42 #else
43 # define likely(x) (x)
44 # define unlikely(x) (x)
45 #endif
47 #ifdef _MSC_VER
48 # define ALIGNED(n) _declspec(align(n))
49 #else
50 # define ALIGNED(n) __attribute__((aligned(n)))
51 #endif
53 #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
55 #define CHECK_EOF() \
56 if (buf == buf_end) { \
57 *ret = -2; \
58 return NULL; \
61 #define EXPECT_CHAR_NO_CHECK(ch) \
62 if (*buf++ != ch) { \
63 *ret = -1; \
64 return NULL; \
67 #define EXPECT_CHAR(ch) \
68 CHECK_EOF(); \
69 EXPECT_CHAR_NO_CHECK(ch);
71 #define ADVANCE_TOKEN(tok, toklen) \
72 do { \
73 const char *tok_start = buf; \
74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
75 int found2; \
76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
77 if (!found2) { \
78 CHECK_EOF(); \
79 } \
80 while (1) { \
81 if (*buf == ' ') { \
82 break; \
83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \
85 *ret = -1; \
86 return NULL; \
87 } \
88 } \
89 ++buf; \
90 CHECK_EOF(); \
91 } \
92 tok = tok_start; \
93 toklen = buf - tok_start; \
94 } while (0)
96 static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
105 static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) {
106 *found = 0;
107 #if __SSE4_2__
108 if (likely(buf_end - buf >= 16)) {
109 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111 size_t left = (buf_end - buf) & ~15;
112 do {
113 __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
114 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
115 if (unlikely(r != 16)) {
116 buf += r;
117 *found = 1;
118 break;
120 buf += 16;
121 left -= 16;
122 } while (likely(left != 0));
124 #else
125 /* suppress unused parameter warning */
126 (void)buf_end;
127 (void)ranges;
128 (void)ranges_size;
129 #endif
130 return buf;
133 static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) {
134 const char *token_start = buf;
136 #ifdef __SSE4_2__
137 static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
138 "\012\037" /* allow SP and up to but not including DEL */
139 "\177\177"; /* allow chars w. MSB set */
140 int found;
141 buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
142 if (found)
143 goto FOUND_CTL;
144 #else
145 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
146 while (likely(buf_end - buf >= 8)) {
147 # define DOIT() \
148 do { \
149 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
150 goto NonPrintable; \
151 ++buf; \
152 } while (0)
153 DOIT();
154 DOIT();
155 DOIT();
156 DOIT();
157 DOIT();
158 DOIT();
159 DOIT();
160 DOIT();
161 # undef DOIT
162 continue;
163 NonPrintable:
164 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
165 goto FOUND_CTL;
167 ++buf;
169 #endif
170 for (;; ++buf) {
171 CHECK_EOF();
172 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
173 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
174 goto FOUND_CTL;
178 FOUND_CTL:
179 if (likely(*buf == '\015')) {
180 ++buf;
181 EXPECT_CHAR('\012');
182 *token_len = buf - 2 - token_start;
183 } else if (*buf == '\012') {
184 *token_len = buf - token_start;
185 ++buf;
186 } else {
187 *ret = -1;
188 return NULL;
190 *token = token_start;
192 return buf;
195 static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) {
196 int ret_cnt = 0;
197 buf = last_len < 3 ? buf : buf + last_len - 3;
199 while (1) {
200 CHECK_EOF();
201 if (*buf == '\015') {
202 ++buf;
203 CHECK_EOF();
204 EXPECT_CHAR('\012');
205 ++ret_cnt;
206 } else if (*buf == '\012') {
207 ++buf;
208 ++ret_cnt;
209 } else {
210 ++buf;
211 ret_cnt = 0;
213 if (ret_cnt == 2) {
214 return buf;
218 *ret = -2;
219 return NULL;
222 #define PARSE_INT(valp_, mul_) \
223 if (*buf < '0' || '9' < *buf) { \
224 buf++; \
225 *ret = -1; \
226 return NULL; \
228 *(valp_) = (mul_) * (*buf++ - '0');
230 #define PARSE_INT_3(valp_) \
231 do { \
232 int res_ = 0; \
233 PARSE_INT(&res_, 100) \
234 *valp_ = res_; \
235 PARSE_INT(&res_, 10) \
236 *valp_ += res_; \
237 PARSE_INT(&res_, 1) \
238 *valp_ += res_; \
239 } while (0)
241 /* returned pointer is always within [buf, buf_end), or null */
242 static const char *parse_http_version(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *ret) {
243 /* we want at least [HTTP/1.<two chars>] to try to parse */
244 if (buf_end - buf < 9) {
245 *ret = -2;
246 return NULL;
248 EXPECT_CHAR_NO_CHECK('H');
249 EXPECT_CHAR_NO_CHECK('T');
250 EXPECT_CHAR_NO_CHECK('T');
251 EXPECT_CHAR_NO_CHECK('P');
252 EXPECT_CHAR_NO_CHECK('/');
253 PARSE_INT(major_version, 1);
254 if (*major_version == 1) {
255 EXPECT_CHAR_NO_CHECK('.');
256 PARSE_INT(minor_version, 1);
257 } else {
258 *minor_version = 0;
260 return buf;
263 static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, size_t max_headers,
264 int *ret) {
265 for (;; ++*num_headers) {
266 CHECK_EOF();
267 if (*buf == '\015') {
268 ++buf;
269 EXPECT_CHAR('\012');
270 break;
271 } else if (*buf == '\012') {
272 ++buf;
273 break;
275 if (*num_headers == max_headers) {
276 *ret = -1;
277 return NULL;
279 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
280 /* parsing name, but do not discard SP before colon, see
281 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
282 headers[*num_headers].name = buf;
283 static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
284 "\"\"" /* 0x22 */
285 "()" /* 0x28,0x29 */
286 ",," /* 0x2c */
287 "//" /* 0x2f */
288 ":@" /* 0x3a-0x40 */
289 "[]" /* 0x5b-0x5d */
290 "{\377"; /* 0x7b-0xff */
291 int found;
292 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
293 if (!found) {
294 CHECK_EOF();
296 while (1) {
297 if (*buf == ':') {
298 break;
299 } else if (!token_char_map[(unsigned char)*buf]) {
300 *ret = -1;
301 return NULL;
303 ++buf;
304 CHECK_EOF();
306 if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
307 *ret = -1;
308 return NULL;
310 ++buf;
311 for (;; ++buf) {
312 CHECK_EOF();
313 if (!(*buf == ' ' || *buf == '\t')) {
314 break;
317 } else {
318 headers[*num_headers].name = NULL;
319 headers[*num_headers].name_len = 0;
321 const char *value;
322 size_t value_len;
323 if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
324 return NULL;
326 /* remove trailing SPs and HTABs */
327 const char *value_end = value + value_len;
328 for (; value_end != value; --value_end) {
329 const char c = *(value_end - 1);
330 if (!(c == ' ' || c == '\t')) {
331 break;
334 headers[*num_headers].value = value;
335 headers[*num_headers].value_len = value_end - value;
337 return buf;
340 static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
341 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers,
342 size_t max_headers, int *ret) {
343 /* skip first empty line (some clients add CRLF after POST content) */
344 CHECK_EOF();
345 if (*buf == '\015') {
346 ++buf;
347 EXPECT_CHAR('\012');
348 } else if (*buf == '\012') {
349 ++buf;
352 /* parse request line */
353 ADVANCE_TOKEN(*method, *method_len);
354 do {
355 ++buf;
356 } while (*buf == ' ');
357 ADVANCE_TOKEN(*path, *path_len);
358 do {
359 ++buf;
360 } while (*buf == ' ');
361 if (*method_len == 0 || *path_len == 0) {
362 *ret = -1;
363 return NULL;
365 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
366 return NULL;
368 if (*buf == '\015') {
369 ++buf;
370 EXPECT_CHAR('\012');
371 } else if (*buf == '\012') {
372 ++buf;
373 } else {
374 *ret = -1;
375 return NULL;
378 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
381 int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
382 int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) {
383 const char *buf = buf_start, *buf_end = buf_start + len;
384 size_t max_headers = *num_headers;
385 int r;
387 *method = NULL;
388 *method_len = 0;
389 *path = NULL;
390 *path_len = 0;
391 *major_version = -1;
392 *minor_version = -1;
393 *num_headers = 0;
395 /* if last_len != 0, check if the request is complete (a fast countermeasure
396 against slowloris */
397 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
398 return r;
401 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, major_version, minor_version, headers, num_headers,
402 max_headers, &r)) == NULL) {
403 return r;
406 return (int)(buf - buf_start);
409 static const char *parse_response(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *status,
410 const char **msg, size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers,
411 int *ret) {
412 /* parse "HTTP/1.x" */
413 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
414 return NULL;
416 /* skip space */
417 if (*buf != ' ') {
418 *ret = -1;
419 return NULL;
421 do {
422 ++buf;
423 } while (*buf == ' ');
424 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
425 if (buf_end - buf < 4) {
426 *ret = -2;
427 return NULL;
429 PARSE_INT_3(status);
431 /* get message including preceding space */
432 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
433 return NULL;
435 if (*msg_len == 0) {
436 /* ok */
437 } else if (**msg == ' ') {
438 /* remove preceding space */
439 do {
440 ++*msg;
441 --*msg_len;
442 } while (**msg == ' ');
443 } else {
444 /* garbage found after status code */
445 *ret = -1;
446 return NULL;
449 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
452 int phr_parse_response(const char *buf_start, size_t len, int *major_version, int *minor_version, int *status, const char **msg,
453 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t last_len) {
454 const char *buf = buf_start, *buf_end = buf + len;
455 size_t max_headers = *num_headers;
456 int r;
458 *major_version = -1;
459 *minor_version = -1;
460 *status = 0;
461 *msg = NULL;
462 *msg_len = 0;
463 *num_headers = 0;
465 /* if last_len != 0, check if the response is complete (a fast countermeasure
466 against slowloris */
467 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
468 return r;
471 if ((buf = parse_response(buf, buf_end, major_version, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) ==
472 NULL) {
473 return r;
476 return (int)(buf - buf_start);
479 int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) {
480 const char *buf = buf_start, *buf_end = buf + len;
481 size_t max_headers = *num_headers;
482 int r;
484 *num_headers = 0;
486 /* if last_len != 0, check if the response is complete (a fast countermeasure
487 against slowloris */
488 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
489 return r;
492 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
493 return r;
496 return (int)(buf - buf_start);
499 enum {
500 CHUNKED_IN_CHUNK_SIZE,
501 CHUNKED_IN_CHUNK_EXT,
502 CHUNKED_IN_CHUNK_DATA,
503 CHUNKED_IN_CHUNK_CRLF,
504 CHUNKED_IN_TRAILERS_LINE_HEAD,
505 CHUNKED_IN_TRAILERS_LINE_MIDDLE
508 static int decode_hex(int ch) {
509 if ('0' <= ch && ch <= '9') {
510 return ch - '0';
511 } else if ('A' <= ch && ch <= 'F') {
512 return ch - 'A' + 0xa;
513 } else if ('a' <= ch && ch <= 'f') {
514 return ch - 'a' + 0xa;
515 } else {
516 return -1;
520 ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) {
521 size_t dst = 0, src = 0, bufsz = *_bufsz;
522 ssize_t ret = -2; /* incomplete */
524 while (1) {
525 switch (decoder->_state) {
526 case CHUNKED_IN_CHUNK_SIZE:
527 for (;; ++src) {
528 int v;
529 if (src == bufsz)
530 goto Exit;
531 if ((v = decode_hex(buf[src])) == -1) {
532 if (decoder->_hex_count == 0) {
533 ret = -1;
534 goto Exit;
536 break;
538 if (decoder->_hex_count == sizeof(size_t) * 2) {
539 ret = -1;
540 goto Exit;
542 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
543 ++decoder->_hex_count;
545 decoder->_hex_count = 0;
546 decoder->_state = CHUNKED_IN_CHUNK_EXT;
547 /* fallthru */
548 case CHUNKED_IN_CHUNK_EXT:
549 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
550 for (;; ++src) {
551 if (src == bufsz)
552 goto Exit;
553 if (buf[src] == '\012')
554 break;
556 ++src;
557 if (decoder->bytes_left_in_chunk == 0) {
558 if (decoder->consume_trailer) {
559 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
560 break;
561 } else {
562 goto Complete;
565 decoder->_state = CHUNKED_IN_CHUNK_DATA;
566 /* fallthru */
567 case CHUNKED_IN_CHUNK_DATA: {
568 size_t avail = bufsz - src;
569 if (avail < decoder->bytes_left_in_chunk) {
570 if (dst != src)
571 memmove(buf + dst, buf + src, avail);
572 src += avail;
573 dst += avail;
574 decoder->bytes_left_in_chunk -= avail;
575 goto Exit;
577 if (dst != src)
578 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
579 src += decoder->bytes_left_in_chunk;
580 dst += decoder->bytes_left_in_chunk;
581 decoder->bytes_left_in_chunk = 0;
582 decoder->_state = CHUNKED_IN_CHUNK_CRLF;
584 /* fallthru */
585 case CHUNKED_IN_CHUNK_CRLF:
586 for (;; ++src) {
587 if (src == bufsz)
588 goto Exit;
589 if (buf[src] != '\015')
590 break;
592 if (buf[src] != '\012') {
593 ret = -1;
594 goto Exit;
596 ++src;
597 decoder->_state = CHUNKED_IN_CHUNK_SIZE;
598 break;
599 case CHUNKED_IN_TRAILERS_LINE_HEAD:
600 for (;; ++src) {
601 if (src == bufsz)
602 goto Exit;
603 if (buf[src] != '\015')
604 break;
606 if (buf[src++] == '\012')
607 goto Complete;
608 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
609 /* fallthru */
610 case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
611 for (;; ++src) {
612 if (src == bufsz)
613 goto Exit;
614 if (buf[src] == '\012')
615 break;
617 ++src;
618 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
619 break;
620 default:
621 assert(!"decoder is corrupt");
625 Complete:
626 ret = bufsz - src;
627 Exit:
628 if (dst != src)
629 memmove(buf + dst, buf + src, bufsz - src);
630 *_bufsz = dst;
631 return ret;
634 int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) { return decoder->_state == CHUNKED_IN_CHUNK_DATA; }
636 #undef CHECK_EOF
637 #undef EXPECT_CHAR
638 #undef ADVANCE_TOKEN