Gratuitious prototype refinements.
[polipo.git] / http_parse.c
bloba7cc7c98dce0199ea0b70ef17029662b1af06067
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 static int getNextWord(const char *buf, int i, int *x_return, int *y_return);
26 static int getNextToken(const char *buf, int i, int *x_return, int *y_return);
27 static int getNextTokenInList(const char *buf, int i,
28 int *x_return, int *y_return,
29 int *z_return, int *t_return,
30 int *end_return);
32 static AtomPtr atomConnection, atomProxyConnection, atomContentLength,
33 atomHost, atomAcceptRange, atomTE,
34 atomReferer, atomProxyAuthenticate, atomProxyAuthorization,
35 atomKeepAlive, atomTrailers, atomUpgrade, atomDate, atomExpires,
36 atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified,
37 atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding,
38 atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange,
39 atomVia, atomContentType, atomVary, atomExpect, atomAuthorization,
40 atomSetCookie, atomCookie, atomCookie2,
41 atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation,
42 atomXPolipoBodyOffset;
44 int censorReferer = 0;
45 int laxHttpParser = 1;
47 static AtomListPtr censoredHeaders;
49 void
50 preinitHttpParser()
52 CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter,
53 "Censor referer headers.");
54 censoredHeaders = makeAtomList(NULL, 0);
55 if(censoredHeaders == NULL) {
56 do_log(L_ERROR, "Couldn't allocate censored atoms.\n");
57 exit(1);
59 CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER,
60 "Headers to censor.");
61 CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter,
62 "Ignore unknown HTTP headers.");
65 void
66 initHttpParser()
68 #define A(name, value) name = internAtom(value); if(!name) goto fail;
69 /* These must be in lower-case */
70 A(atomConnection, "connection");
71 A(atomProxyConnection, "proxy-connection");
72 A(atomContentLength, "content-length");
73 A(atomHost, "host");
74 A(atomAcceptRange, "accept-range");
75 A(atomTE, "te");
76 A(atomReferer, "referer");
77 A(atomProxyAuthenticate, "proxy-authenticate");
78 A(atomProxyAuthorization, "proxy-authorization");
79 A(atomKeepAlive, "keep-alive");
80 A(atomTrailers, "trailers");
81 A(atomUpgrade, "upgrade");
82 A(atomDate, "date");
83 A(atomExpires, "expires");
84 A(atomIfModifiedSince, "if-modified-since");
85 A(atomIfUnmodifiedSince, "if-unmodified-since");
86 A(atomIfRange, "if-range");
87 A(atomLastModified, "last-modified");
88 A(atomIfMatch, "if-match");
89 A(atomIfNoneMatch, "if-none-match");
90 A(atomAge, "age");
91 A(atomTransferEncoding, "transfer-encoding");
92 A(atomETag, "etag");
93 A(atomCacheControl, "cache-control");
94 A(atomPragma, "pragma");
95 A(atomContentRange, "content-range");
96 A(atomRange, "range");
97 A(atomVia, "via");
98 A(atomContentType, "content-type");
99 A(atomVary, "vary");
100 A(atomExpect, "expect");
101 A(atomAuthorization, "authorization");
102 A(atomSetCookie, "set-cookie");
103 A(atomCookie, "cookie");
104 A(atomCookie2, "cookie2");
105 A(atomXPolipoDate, "x-polipo-date");
106 A(atomXPolipoAccess, "x-polipo-access");
107 A(atomXPolipoLocation, "x-polipo-location");
108 A(atomXPolipoBodyOffset, "x-polipo-body-offset");
109 #undef A
110 return;
112 fail:
113 do_log(L_ERROR, "Couldn't allocate atom.\n");
114 exit(1);
117 static int
118 getNextWord(const char *restrict buf, int i, int *x_return, int *y_return)
120 int x, y;
121 while(buf[i] == ' ') i++;
122 if(buf[i] == '\n' || buf[i] == '\r') return -1;
123 x = i;
124 while(buf[i] > 32 && buf[i] < 127) i++;
125 y = i;
127 *x_return = x;
128 *y_return = y;
130 return 0;
133 static int
134 skipComment(const char *restrict buf, int i)
136 assert(buf[i] == '(');
138 i++;
139 while(1) {
140 if(buf[i] == '\\' && buf[i + 1] == ')') i+=2;
141 else if(buf[i] == ')') return i + 1;
142 else if(buf[i] == '\n') {
143 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
144 i += 2;
145 else
146 return -1;
147 } else if(buf[i] == '\r') {
148 if(buf[i + 1] != '\n') return -1;
149 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
150 i += 3;
151 else
152 return -1;
153 } else {
154 i++;
157 return i;
161 static int
162 skipWhitespace(const char *restrict buf, int i)
164 while(1) {
165 if(buf[i] == ' ' || buf[i] == '\t')
166 i++;
167 else if(buf[i] == '(') {
168 i = skipComment(buf, i);
169 if(i < 0) return -1;
170 } else if(buf[i] == '\n') {
171 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
172 i += 2;
173 else
174 return i;
175 } else if(buf[i] == '\r' && buf[i + 1] == '\n') {
176 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
177 i += 3;
178 else
179 return i;
180 } else
181 return i;
185 static int
186 getNextToken(const char *restrict buf, int i, int *x_return, int *y_return)
188 int x, y;
189 again:
190 while(buf[i] == ' ' || buf[i] == '\t')
191 i++;
192 if(buf[i] == '(') {
193 i++;
194 while(buf[i] != ')') {
195 if(buf[i] == '\n' || buf[i] == '\r')
196 return -1;
197 if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r')
198 buf += 2;
199 else
200 buf++;
202 goto again;
204 if(buf[i] == '\n') {
205 if(buf[i + 1] == ' ' || buf[i + 1] == '\t') {
206 i += 2;
207 goto again;
208 } else {
209 return -1;
212 if(buf[i] == '\r') {
213 if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) {
214 i += 3;
215 goto again;
216 } else {
217 return -1;
220 x = i;
221 while(buf[i] > 32 && buf[i] < 127) {
222 switch(buf[i]) {
223 case '(': case ')': case '<': case '>': case '@':
224 case ',': case ';': case ':': case '\\': case '/':
225 case '[': case ']': case '?': case '=':
226 case '{': case '}': case ' ': case '\t':
227 goto out;
228 default:
229 i++;
232 out:
233 y = i;
235 *x_return = x;
236 *y_return = y;
238 return y;
241 static int
242 getNextETag(const char * restrict buf, int i,
243 int *x_return, int *y_return, int *weak_return)
245 int weak = 0;
246 int x, y;
247 while(buf[i] == ' ' || buf[i] == '\t')
248 i++;
249 if(buf[i] == 'W' && buf[i + 1] == '/') {
250 weak = 1;
251 i += 2;
253 if(buf[i] == '"')
254 i++;
255 else
256 return -1;
258 x = i;
259 while(buf[i] != '"') {
260 if(buf[i] == '\r' && buf[i] == '\n')
261 return -1;
262 i++;
264 y = i;
265 i++;
267 *x_return = x;
268 *y_return = y;
269 *weak_return = weak;
270 return i;
273 static int
274 getNextTokenInList(const char *restrict buf, int i,
275 int *x_return, int *y_return,
276 int *z_return, int *t_return,
277 int *end_return)
279 int j, x, y, z = -1, t = -1, end;
280 j = getNextToken(buf, i, &x, &y);
281 if(j < 0)
282 return -1;
283 while(buf[j] == ' ' || buf[j] == '\t')
284 j++;
286 if(buf[j] == '=') {
287 j++;
288 while(buf[j] == ' ' || buf[j] == '\t')
289 j++;
290 z = j;
291 while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r')
292 j++;
295 if(buf[j] == '\n' || buf[j] == '\r') {
296 if(buf[j] == '\r') {
297 if(buf[j + 1] != '\n')
298 return -1;
299 j += 2;
300 } else
301 j++;
302 end = 1;
303 if(buf[j] == ' ' || buf[j] == '\t') {
304 while(buf[j] == ' ' || buf[j] == '\t')
305 j++;
306 end = 0;
308 } else if(buf[j] == ',') {
309 j++;
310 while(buf[j] == ' ' || buf[j] == '\t')
311 j++;
312 end = 0;
313 } else {
314 return -1;
317 *x_return = x;
318 *y_return = y;
319 if(z_return)
320 *z_return = z;
321 if(t_return)
322 *t_return = t;
323 *end_return = end;
324 return j;
327 static inline int
328 token_compare(const char *buf, int start, int end, const char *s)
330 return (strcasecmp_n(s, buf + start, end - start) == 0);
333 static int
334 skipEol(const char *restrict buf, int i)
336 while(buf[i] == ' ')
337 i++;
338 if(buf[i] == '\n')
339 return i + 1;
340 else if(buf[i] == '\r') {
341 if(buf[i + 1] == '\n')
342 return i + 2;
343 else
344 return -1;
345 } else {
346 return -1;
350 static int
351 skipToEol(const char *restrict buf, int i, int *start_return)
353 while(buf[i] != '\n' && buf[i] != '\r')
354 i++;
355 if(buf[i] == '\n') {
356 *start_return = i;
357 return i + 1;
358 } else if(buf[i] == '\r') {
359 if(buf[i + 1] == '\n') {
360 *start_return = i;
361 return i + 2;
362 } else {
363 return -1;
366 return -1;
369 static int
370 getHeaderValue(const char *restrict buf, int start,
371 int *value_start_return, int *value_end_return)
373 int i, j, k;
375 while(buf[start] == ' ' || buf[start] == '\t')
376 start++;
377 i = start;
378 again:
379 j = skipToEol(buf, i, &k);
380 if(j < 0)
381 return -1;
382 if(buf[j] == ' ' || buf[j] == '\t') {
383 i = j + 1;
384 goto again;
386 *value_start_return = start;
387 *value_end_return = k;
388 return j;
392 httpParseClientFirstLine(const char *restrict buf, int offset,
393 int *method_return,
394 AtomPtr *url_return,
395 int *version_return)
397 int i = 0;
398 int x, y;
399 int method;
400 AtomPtr url;
401 int version = HTTP_UNKNOWN;
402 int eol;
404 i = offset;
405 i = getNextWord(buf, i, &x, &y);
406 if(i < 0) return -1;
407 if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0)
408 method = METHOD_GET;
409 else if(y == x + 4 && memcmp(buf + x, "HEAD", 3) == 0)
410 method = METHOD_HEAD;
411 else if(y == x + 4 && memcmp(buf + x, "POST", 3) == 0)
412 method = METHOD_POST;
413 else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0)
414 method = METHOD_PUT;
415 else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0)
416 method = METHOD_CONNECT;
417 else
418 method = METHOD_UNKNOWN;
420 i = getNextWord(buf, y + 1, &x, &y);
421 if(i < 0) return -1;
423 url = internAtomN(buf + x, y - x);
425 i = getNextWord(buf, y + 1, &x, &y);
426 if(i < 0) {
427 releaseAtom(url);
428 return -1;
431 if(y == x + 8) {
432 if(memcmp(buf + x, "HTTP/1.", 7) != 0)
433 version = HTTP_UNKNOWN;
434 else if(buf[x + 7] == '0')
435 version = HTTP_10;
436 else if(buf[x + 7] >= '1' && buf[x + 7] <= '9')
437 version = HTTP_11;
438 else
439 version = HTTP_UNKNOWN;
442 eol = skipEol(buf, y);
443 if(eol < 0) return -1;
445 *method_return = method;
446 if(url_return)
447 *url_return = url;
448 else
449 releaseAtom(url);
450 *version_return = version;
451 return eol;
455 httpParseServerFirstLine(const char *restrict buf,
456 int *status_return,
457 int *version_return,
458 AtomPtr *message_return)
460 int i = 0;
461 int x, y, eol;
462 int status;
463 int version = HTTP_UNKNOWN;
465 i = getNextWord(buf, 0, &x, &y);
466 if(i < 0)
467 return -1;
468 if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0)
469 version = HTTP_10;
470 else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0)
471 version = HTTP_11;
472 else
473 version = HTTP_UNKNOWN;
475 i = getNextWord(buf, y + 1, &x, &y);
476 if(i < 0) return -1;
477 if(y == x + 3)
478 status = atol(buf + x);
479 else return -1;
481 i = skipToEol(buf, y, &eol);
482 if(i < 0) return -1;
484 *status_return = status;
485 *version_return = version;
486 if(message_return) {
487 /* Netscape enterprise bug */
488 if(eol > y)
489 *message_return = internAtomN(buf + y + 1, eol - y - 1);
490 else
491 *message_return = internAtom("No message");
493 return i;
496 static int
497 parseInt(const char *restrict buf, int start, int *val_return)
499 int i = start, val = 0;
500 if(!digit(buf[i]))
501 return -1;
502 while(digit(buf[i])) {
503 val = val * 10 + (buf[i] - '0');
504 i++;
506 *val_return = val;
507 return i;
510 /* Returned *name_start_return is -1 at end of headers, -2 if the line
511 couldn't be parsed. */
512 static int
513 parseHeaderLine(const char *restrict buf, int start,
514 int *name_start_return, int *name_end_return,
515 int *value_start_return, int *value_end_return)
517 int i;
518 int name_start, name_end, value_start, value_end;
520 if(buf[start] == '\n') {
521 *name_start_return = -1;
522 return start + 1;
524 if(buf[start] == '\r' && buf[start + 1] == '\n') {
525 *name_start_return = -1;
526 return start + 2;
529 i = getNextToken(buf, start, &name_start, &name_end);
530 if(i < 0 || buf[i] != ':')
531 goto syntax;
532 i++;
533 while(buf[i] == ' ' || buf[i] == '\t')
534 i++;
536 i = getHeaderValue(buf, i, &value_start, &value_end);
537 if(i < 0)
538 goto syntax;
540 *name_start_return = name_start;
541 *name_end_return = name_end;
542 *value_start_return = value_start;
543 *value_end_return = value_end;
544 return i;
546 syntax:
547 i = start;
548 while(1) {
549 if(buf[i] == '\n') {
550 i++;
551 break;
553 if(buf[i] == '\r' && buf[i + 1] == '\n') {
554 i += 2;
555 break;
557 i++;
559 *name_start_return = -2;
560 return i;
564 findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return)
566 int i = from;
567 int eol = 0;
568 while(i < to) {
569 if(buf[i] == '\n') {
570 if(eol) {
571 *body_return = i + 1;
572 return eol;
574 eol = i;
575 i++;
576 } else if(buf[i] == '\r') {
577 if(i < to - 1 && buf[i + 1] == '\n') {
578 if(eol) {
579 *body_return = eol;
580 return i + 2;
582 eol = i;
583 i += 2;
584 } else {
585 eol = 0;
586 i++;
588 } else {
589 eol = 0;
590 i++;
593 return -1;
596 static int
597 parseContentRange(const char *restrict buf, int i,
598 int *from_return, int *to_return, int *full_len_return)
600 int j;
601 int from, to, full_len;
603 i = skipWhitespace(buf, i);
604 if(i < 0) return -1;
605 if(!token_compare(buf, i, i + 5, "bytes"))
606 return -1;
607 i += 5;
608 i = skipWhitespace(buf, i);
609 if(buf[i] == '*') {
610 from = 0;
611 to = -1;
612 i++;
613 } else {
614 i = parseInt(buf, i, &from);
615 if(i < 0) return -1;
616 if(buf[i] != '-') return -1;
617 i++;
618 i = parseInt(buf, i, &to);
619 if(i < 0) return -1;
620 to = to + 1;
622 if(buf[i] != '/')
623 return -1;
624 i++;
625 if(buf[i] == '*')
626 full_len = -1;
627 else {
628 i = parseInt(buf, i, &full_len);
629 if(i < 0) return -1;
631 j = skipEol(buf, i);
632 if(j < 0)
633 return -1;
635 *from_return = from;
636 *to_return = to;
637 *full_len_return = full_len;
638 return i;
641 static int
642 parseRange(const char *restrict buf, int i,
643 int *from_return, int *to_return)
645 int j;
646 int from, to;
648 i = skipWhitespace(buf, i);
649 if(i < 0)
650 return -1;
651 if(!token_compare(buf, i, i + 6, "bytes="))
652 return -1;
653 i += 6;
654 i = skipWhitespace(buf, i);
655 if(buf[i] == '-') {
656 from = 0;
657 } else {
658 i = parseInt(buf, i, &from);
659 if(i < 0) return -1;
661 if(buf[i] != '-')
662 return -1;
663 i++;
664 j = parseInt(buf, i, &to);
665 if(j < 0)
666 to = -1;
667 else {
668 to = to + 1;
669 i = j;
671 j = skipEol(buf, i);
672 if(j < 0) return -1;
673 *from_return = from;
674 *to_return = to;
675 return i;
678 static int
679 urlSameHost(const char *url1, int len1, const char *url2, int len2)
681 int i;
682 if(len1 < 7 || len2 < 7)
683 return 0;
684 if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0)
685 return 0;
687 i = 7;
688 while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') {
689 if((url1[i] | 0x20) != (url2[i] | 0x20))
690 break;
691 i++;
694 if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/')))
695 return 1;
696 return 0;
699 static char *
700 resize_hbuf(char *hbuf, int *size, char *hbuf_small)
702 int new_size = 2 * *size;
703 char *new_hbuf;
705 if(new_size <= *size)
706 goto fail;
708 if(hbuf == hbuf_small) {
709 new_hbuf = malloc(new_size);
710 if(new_hbuf == NULL) goto fail;
711 memcpy(new_hbuf, hbuf, *size);
712 } else {
713 new_hbuf = realloc(hbuf, new_size);
714 if(new_hbuf == NULL) goto fail;
716 *size = new_size;
717 return new_hbuf;
719 fail:
720 if(hbuf != hbuf_small)
721 free(hbuf);
722 *size = 0;
723 return NULL;
727 httpParseHeaders(int client, AtomPtr url,
728 const char *buf, int start, HTTPRequestPtr request,
729 AtomPtr *headers_return,
730 int *len_return, CacheControlPtr cache_control_return,
731 HTTPConditionPtr *condition_return, int *te_return,
732 time_t *date_return, time_t *last_modified_return,
733 time_t *expires_return, time_t *polipo_age_return,
734 time_t *polipo_access_return, int *polipo_body_offset_return,
735 int *age_return, char **etag_return, AtomPtr *expect_return,
736 HTTPRangePtr range_return, HTTPRangePtr content_range_return,
737 char **location_return, AtomPtr *via_return,
738 AtomPtr *auth_return)
740 int local = url ? urlIsLocal(url->string, url->length) : 0;
741 char hbuf_small[512];
742 char *hbuf = hbuf_small;
743 int hbuf_size = 512, hbuf_length = 0;
744 int i, j,
745 name_start, name_end, value_start, value_end,
746 token_start, token_end, end;
747 AtomPtr name = NULL;
748 time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1,
749 polipo_access = -1, polipo_body_offset = -1;
750 int len = -1;
751 CacheControlRec cache_control;
752 char *endptr;
753 int te = TE_IDENTITY;
754 int age = -1;
755 char *etag = NULL, *ifrange = NULL;
756 int persistent = (!request || (request->connection->version != HTTP_10));
757 char *location = NULL;
758 AtomPtr via = NULL;
759 AtomPtr auth = NULL;
760 AtomPtr expect = NULL;
761 HTTPConditionPtr condition;
762 time_t ims = -1, inms = -1;
763 char *im = NULL, *inm = NULL;
764 AtomListPtr hopToHop = NULL;
765 HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1};
766 int haveCacheControl = 0;
768 #define RESIZE_HBUF() \
769 do { \
770 hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \
771 if(hbuf == NULL) \
772 goto fail; \
773 } while(0)
775 cache_control.flags = 0;
776 cache_control.max_age = -1;
777 cache_control.s_maxage = -1;
778 cache_control.min_fresh = 0;
779 cache_control.max_stale = 0;
781 i = start;
783 while(1) {
784 i = parseHeaderLine(buf, i,
785 &name_start, &name_end, &value_start, &value_end);
786 if(i < 0) {
787 do_log(L_ERROR, "Couldn't find end of header line.\n");
788 goto fail;
791 if(name_start == -1)
792 break;
794 if(name_start < 0)
795 continue;
797 name = internAtomLowerN(buf + name_start, name_end - name_start);
799 if(name == atomConnection) {
800 j = getNextTokenInList(buf, value_start,
801 &token_start, &token_end, NULL, NULL,
802 &end);
803 while(1) {
804 if(j < 0) {
805 do_log(L_ERROR, "Couldn't parse Connection: ");
806 do_log_n(L_ERROR, buf + value_start,
807 value_end - value_start);
808 do_log(L_ERROR, ".\n");
809 goto fail;
811 if(token_compare(buf, token_start, token_end, "close")) {
812 persistent = 0;
813 } else if(token_compare(buf, token_start, token_end,
814 "keep-alive")) {
815 persistent = 1;
816 } else {
817 if(hopToHop == NULL)
818 hopToHop = makeAtomList(NULL, 0);
819 if(hopToHop == NULL) {
820 do_log(L_ERROR, "Couldn't allocate atom list.\n");
821 goto fail;
823 atomListCons(internAtomLowerN(buf + token_start,
824 token_end - token_start),
825 hopToHop);
827 if(end)
828 break;
829 j = getNextTokenInList(buf, j,
830 &token_start, &token_end, NULL, NULL,
831 &end);
833 } else if(name == atomCacheControl)
834 haveCacheControl = 1;
836 releaseAtom(name);
837 name = NULL;
840 i = start;
842 while(1) {
843 i = parseHeaderLine(buf, i,
844 &name_start, &name_end, &value_start, &value_end);
845 if(i < 0) {
846 do_log(L_ERROR, "Couldn't find end of header line.\n");
847 goto fail;
850 if(name_start == -1)
851 break;
853 if(name_start < 0) {
854 do_log(L_WARN, "Couldn't parse header line.\n");
855 if(laxHttpParser)
856 continue;
857 else
858 goto fail;
861 name = internAtomLowerN(buf + name_start, name_end - name_start);
863 if(name == atomProxyConnection) {
864 j = getNextTokenInList(buf, value_start,
865 &token_start, &token_end, NULL, NULL,
866 &end);
867 while(1) {
868 if(j < 0) {
869 do_log(L_WARN, "Couldn't parse Proxy-Connection:");
870 do_log_n(L_WARN, buf + value_start,
871 value_end - value_start);
872 do_log(L_WARN, ".\n");
873 persistent = 0;
874 break;
876 if(token_compare(buf, token_start, token_end, "close")) {
877 persistent = 0;
878 } else if(token_compare(buf, token_start, token_end,
879 "keep-alive")) {
880 persistent = 1;
882 if(end)
883 break;
884 j = getNextTokenInList(buf, j,
885 &token_start, &token_end, NULL, NULL,
886 &end);
888 } else if(name == atomContentLength) {
889 j = skipWhitespace(buf, value_start);
890 if(j < 0) {
891 do_log(L_WARN, "Couldn't parse Content-Length: \n");
892 do_log_n(L_WARN, buf + value_start, value_end - value_start);
893 do_log(L_WARN, ".\n");
894 len = -1;
895 } else {
896 len = strtol(buf + value_start, &endptr, 10);
897 if(endptr <= buf + value_start) {
898 do_log(L_WARN, "Couldn't parse Content-Length: \n");
899 do_log_n(L_WARN, buf + value_start,
900 value_end - value_start);
901 do_log(L_WARN, ".\n");
902 len = -1;
905 } else if((!local && name == atomProxyAuthorization) ||
906 (local && name == atomAuthorization)) {
907 if(auth_return) {
908 auth = internAtomN(buf + value_start, value_end - value_start);
909 if(auth == NULL) {
910 do_log(L_ERROR, "Couldn't allocate authorization.\n");
911 goto fail;
914 } else if(name == atomReferer) {
915 int h;
916 if(censorReferer == 0 ||
917 (censorReferer == 1 && url != NULL &&
918 urlSameHost(url->string, url->length,
919 buf + value_start, value_end - value_start))) {
920 while(hbuf_length > hbuf_size - 2)
921 RESIZE_HBUF();
922 hbuf[hbuf_length++] = '\r';
923 hbuf[hbuf_length++] = '\n';
924 do {
925 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
926 buf + name_start, value_end - name_start);
927 if(h < 0) RESIZE_HBUF();
928 } while(h < 0);
929 hbuf_length = h;
931 } else if(name == atomTrailers || name == atomUpgrade) {
932 do_log(L_ERROR, "Trailers or upgrade present.\n");
933 goto fail;
934 } else if(name == atomDate || name == atomExpires ||
935 name == atomIfModifiedSince ||
936 name == atomIfUnmodifiedSince ||
937 name == atomLastModified ||
938 name == atomXPolipoDate || name == atomXPolipoAccess) {
939 time_t t;
940 j = parse_time(buf, value_start, value_end, &t);
941 if(j < 0) {
942 if(name != atomExpires) {
943 do_log(L_WARN, "Couldn't parse %s: ", name->string);
944 do_log_n(L_WARN, buf + value_start,
945 value_end - value_start);
946 do_log(L_WARN, "\n");
948 t = -1;
950 if(name == atomDate) {
951 if(t >= 0)
952 date = t;
953 } else if(name == atomExpires) {
954 if(t >= 0)
955 expires = t;
956 else
957 expires = 0;
958 } else if(name == atomLastModified)
959 last_modified = t;
960 else if(name == atomIfModifiedSince)
961 ims = t;
962 else if(name == atomIfUnmodifiedSince)
963 inms = t;
964 else if(name == atomXPolipoDate)
965 polipo_age = t;
966 else if(name == atomXPolipoAccess)
967 polipo_access = t;
968 } else if(name == atomAge) {
969 j = skipWhitespace(buf, value_start);
970 if(j < 0) {
971 age = -1;
972 } else {
973 age = strtol(buf + value_start, &endptr, 10);
974 if(endptr <= buf + value_start)
975 age = -1;
977 if(age < 0) {
978 do_log(L_WARN, "Couldn't parse age: \n");
979 do_log_n(L_WARN, buf + value_start, value_end - value_start);
980 do_log(L_WARN, " -- ignored.\n");
982 } else if(name == atomXPolipoBodyOffset) {
983 j = skipWhitespace(buf, value_start);
984 if(j < 0) {
985 do_log(L_ERROR, "Couldn't parse body offset.\n");
986 goto fail;
987 } else {
988 polipo_body_offset = strtol(buf + value_start, &endptr, 10);
989 if(endptr <= buf + value_start) {
990 do_log(L_ERROR, "Couldn't parse body offset.\n");
991 goto fail;
994 } else if(name == atomTransferEncoding) {
995 if(token_compare(buf, value_start, value_end, "identity"))
996 te = TE_IDENTITY;
997 else if(token_compare(buf, value_start, value_end, "chunked"))
998 te = TE_CHUNKED;
999 else
1000 te = TE_UNKNOWN;
1001 } else if(name == atomETag ||
1002 name == atomIfNoneMatch || name == atomIfMatch ||
1003 name == atomIfRange) {
1004 int x, y;
1005 int weak;
1006 char *e;
1007 j = getNextETag(buf, value_start, &x, &y, &weak);
1008 if(j < 0) {
1009 if(buf[value_start] != '\r' && buf[value_start] != '\n')
1010 do_log(L_ERROR, "Couldn't parse ETag.\n");
1011 } else if(weak) {
1012 do_log(L_WARN, "Server returned weak ETag -- ignored.\n");
1013 } else {
1014 e = strdup_n(buf + x, y - x);
1015 if(e == NULL) goto fail;
1016 if(name == atomETag) {
1017 if(!etag)
1018 etag = e;
1019 else
1020 free(e);
1021 } else if(name == atomIfNoneMatch) {
1022 if(!inm)
1023 inm = e;
1024 else
1025 free(e);
1026 } else if(name == atomIfMatch) {
1027 if(!im)
1028 im = e;
1029 else
1030 free(e);
1031 } else if(name == atomIfRange) {
1032 if(!ifrange)
1033 ifrange = e;
1034 else
1035 free(e);
1036 } else {
1037 abort();
1040 } else if(name == atomCacheControl) {
1041 int v_start, v_end;
1042 j = getNextTokenInList(buf, value_start,
1043 &token_start, &token_end,
1044 &v_start, &v_end,
1045 &end);
1046 while(1) {
1047 if(j < 0) {
1048 do_log(L_WARN, "Couldn't parse Cache-Control.\n");
1049 cache_control.flags |= CACHE_NO;
1050 break;
1052 if(token_compare(buf, token_start, token_end, "no-cache")) {
1053 cache_control.flags |= CACHE_NO;
1054 } else if(token_compare(buf, token_start, token_end,
1055 "public")) {
1056 cache_control.flags |= CACHE_PUBLIC;
1057 } else if(token_compare(buf, token_start, token_end,
1058 "private")) {
1059 cache_control.flags |= CACHE_PRIVATE;
1060 } else if(token_compare(buf, token_start, token_end,
1061 "no-store")) {
1062 cache_control.flags |= CACHE_NO_STORE;
1063 } else if(token_compare(buf, token_start, token_end,
1064 "no-transform")) {
1065 cache_control.flags |= CACHE_NO_TRANSFORM;
1066 } else if(token_compare(buf, token_start, token_end,
1067 "must-revalidate") ||
1068 token_compare(buf, token_start, token_end,
1069 "must-validate")) { /* losers */
1070 cache_control.flags |= CACHE_MUST_REVALIDATE;
1071 } else if(token_compare(buf, token_start, token_end,
1072 "proxy-revalidate")) {
1073 cache_control.flags |= CACHE_PROXY_REVALIDATE;
1074 } else if(token_compare(buf, token_start, token_end,
1075 "only-if-cached")) {
1076 cache_control.flags |= CACHE_ONLY_IF_CACHED;
1077 } else if(token_compare(buf, token_start, token_end,
1078 "max-age") ||
1079 token_compare(buf, token_start, token_end,
1080 "maxage")) { /* losers */
1081 int a;
1082 if(v_start <= 0 || !digit(buf[v_start])) {
1083 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1084 do_log_n(L_WARN, buf + token_start,
1085 (v_end >= 0 ? v_end : token_end) -
1086 token_start);
1087 do_log(L_WARN, "\n");
1089 a = atoi(buf + v_start);
1090 cache_control.max_age = a;
1091 } else if(token_compare(buf, token_start, token_end,
1092 "s-maxage")) {
1093 int a;
1094 if(v_start <= 0 || !digit(buf[v_start])) {
1095 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1096 do_log_n(L_WARN, buf + token_start,
1097 (v_end >= 0 ? v_end : token_end) -
1098 token_start);
1099 do_log(L_WARN, "\n");
1101 a = atoi(buf + v_start);
1102 cache_control.max_age = a;
1103 } else if(token_compare(buf, token_start, token_end,
1104 "min-fresh")) {
1105 int a;
1106 if(v_start <= 0 || !digit(buf[v_start])) {
1107 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1108 do_log_n(L_WARN, buf + token_start,
1109 (v_end >= 0 ? v_end : token_end) -
1110 token_start);
1111 do_log(L_WARN, "\n");
1113 a = atoi(buf + v_start);
1114 cache_control.max_age = a;
1115 } else if(token_compare(buf, token_start, token_end,
1116 "max-stale")) {
1117 int a;
1118 if(v_start <= 0 || !digit(buf[v_start])) {
1119 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1120 do_log_n(L_WARN, buf + token_start,
1121 (v_end >= 0 ? v_end : token_end) -
1122 token_start);
1123 do_log(L_WARN, "\n");
1125 a = atoi(buf + v_start);
1126 cache_control.max_stale = a;
1127 } else {
1128 do_log(L_WARN, "Unsupported Cache-Control directive ");
1129 do_log_n(L_WARN, buf + token_start,
1130 (v_end >= 0 ? v_end : token_end) - token_start);
1131 do_log(L_WARN, " -- ignored.\n");
1133 if(end)
1134 break;
1135 j = getNextTokenInList(buf, j,
1136 &token_start, &token_end,
1137 &v_start, &v_end,
1138 &end);
1140 } else if(name == atomContentRange) {
1141 if(!client) {
1142 j = parseContentRange(buf, value_start,
1143 &content_range.from, &content_range.to,
1144 &content_range.full_length);
1145 if(j < 0) {
1146 do_log(L_ERROR, "Couldn't parse Content-Range: ");
1147 do_log_n(L_ERROR, buf + value_start,
1148 value_end - value_start);
1149 do_log(L_ERROR, "\n");
1150 goto fail;
1152 } else {
1153 do_log(L_ERROR, "Content-Range from client.\n");
1154 goto fail;
1156 } else if(name == atomRange) {
1157 if(client) {
1158 j = parseRange(buf, value_start, &range.from, &range.to);
1159 if(j < 0) {
1160 do_log(L_WARN, "Couldn't parse Range -- ignored.\n");
1161 range.from = -1;
1162 range.to = -1;
1164 } else {
1165 do_log(L_WARN, "Range from server -- ignored\n");
1167 } else if(name == atomXPolipoLocation) {
1168 if(location_return) {
1169 location =
1170 strdup_n(buf + value_start, value_end - value_start);
1171 if(location == NULL) {
1172 do_log(L_ERROR, "Couldn't allocate location.\n");
1173 goto fail;
1176 } else if(name == atomVia) {
1177 if(via_return) {
1178 via = internAtomN(buf + value_start, value_end - value_start);
1179 if(via == NULL) {
1180 do_log(L_ERROR, "Couldn't allocate via.\n");
1181 goto fail;
1184 } else if(name == atomExpect) {
1185 if(expect_return) {
1186 expect = internAtomLowerN(buf + value_start,
1187 value_end - value_start);
1188 if(expect == NULL) {
1189 do_log(L_ERROR, "Couldn't allocate expect.\n");
1190 goto fail;
1193 } else {
1194 if(!client && name == atomContentType) {
1195 if(token_compare(buf, value_start, value_end,
1196 "multipart/byteranges")) {
1197 do_log(L_ERROR,
1198 "Server returned multipart/byteranges -- yuck!\n");
1199 goto fail;
1202 if(name == atomVary) {
1203 if(!token_compare(buf, value_start, value_end, "host") &&
1204 !token_compare(buf, value_start, value_end, "*")) {
1205 /* What other vary headers should be ignored? */
1206 do_log(L_VARY, "Vary header present (");
1207 do_log_n(L_VARY,
1208 buf + value_start, value_end - value_start);
1209 do_log(L_VARY, ").\n");
1210 cache_control.flags |= CACHE_VARY;
1212 } else if(name == atomAuthorization) {
1213 cache_control.flags |= CACHE_AUTHORIZATION;
1216 if(name == atomPragma) {
1217 /* Pragma is only defined for the client, and the only
1218 standard value is no-cache (RFC 1945, 10.12).
1219 However, we honour a Pragma: no-cache for both the client
1220 and the server when there's no Cache-Control header. In
1221 all cases, we pass the Pragma header to the next hop. */
1222 if(!haveCacheControl) {
1223 j = getNextTokenInList(buf, value_start,
1224 &token_start, &token_end, NULL, NULL,
1225 &end);
1226 while(1) {
1227 if(j < 0) {
1228 do_log(L_WARN, "Couldn't parse Pragma.\n");
1229 cache_control.flags |= CACHE_NO;
1230 break;
1232 if(token_compare(buf, token_start, token_end,
1233 "no-cache"))
1234 cache_control.flags = CACHE_NO;
1235 if(end)
1236 break;
1237 j = getNextTokenInList(buf, j, &token_start, &token_end,
1238 NULL, NULL, &end);
1242 if(!client &&
1243 (name == atomSetCookie ||
1244 name == atomCookie || name == atomCookie2))
1245 cache_control.flags |= CACHE_COOKIE;
1247 if(hbuf) {
1248 if(name != atomConnection && name != atomHost &&
1249 name != atomAcceptRange && name != atomTE &&
1250 name != atomProxyAuthenticate &&
1251 name != atomKeepAlive &&
1252 (!hopToHop || !atomListMember(name, hopToHop)) &&
1253 !atomListMember(name, censoredHeaders)) {
1254 int h;
1255 while(hbuf_length > hbuf_size - 2)
1256 RESIZE_HBUF();
1257 hbuf[hbuf_length++] = '\r';
1258 hbuf[hbuf_length++] = '\n';
1259 do {
1260 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
1261 buf + name_start,
1262 value_end - name_start);
1263 if(h < 0) RESIZE_HBUF();
1264 } while(h < 0);
1265 hbuf_length = h;
1269 releaseAtom(name);
1270 name = NULL;
1273 if(headers_return) {
1274 AtomPtr pheaders = NULL;
1275 pheaders = internAtomN(hbuf, hbuf_length);
1276 if(!pheaders)
1277 goto fail;
1278 *headers_return = pheaders;
1280 if(hbuf != hbuf_small)
1281 free(hbuf);
1282 hbuf = NULL;
1283 hbuf_size = 0;
1285 if(request)
1286 if(!persistent)
1287 request->flags &= ~REQUEST_PERSISTENT;
1289 if(te != TE_IDENTITY) len = -1;
1290 if(len_return) *len_return = len;
1291 if(cache_control_return) *cache_control_return = cache_control;
1292 if(condition_return) {
1293 if(ims >= 0 || inms >= 0 || im || inm || ifrange) {
1294 condition = httpMakeCondition();
1295 if(condition) {
1296 condition->ims = ims;
1297 condition->inms = inms;
1298 condition->im = im;
1299 condition->inm = inm;
1300 condition->ifrange = ifrange;
1301 } else {
1302 do_log(L_ERROR, "Couldn't allocate condition.\n");
1303 if(im) free(im);
1304 if(inm) free(inm);
1306 } else {
1307 condition = NULL;
1309 *condition_return = condition;
1310 } else {
1311 assert(!im && !inm);
1314 if(te_return) *te_return = te;
1315 if(date_return) *date_return = date;
1316 if(last_modified_return) *last_modified_return = last_modified;
1317 if(expires_return) *expires_return = expires;
1318 if(polipo_age_return) *polipo_age_return = polipo_age;
1319 if(polipo_access_return) *polipo_access_return = polipo_access;
1320 if(polipo_body_offset_return)
1321 *polipo_body_offset_return = polipo_body_offset;
1322 if(age_return) *age_return = age;
1323 if(etag_return)
1324 *etag_return = etag;
1325 else {
1326 if(etag) free(etag);
1328 if(range_return) *range_return = range;
1329 if(content_range_return) *content_range_return = content_range;
1330 if(location_return) {
1331 *location_return = location;
1332 } else {
1333 if(location)
1334 free(location);
1336 if(via_return)
1337 *via_return = via;
1338 else {
1339 if(via)
1340 releaseAtom(via);
1342 if(expect_return)
1343 *expect_return = expect;
1344 else {
1345 if(expect)
1346 releaseAtom(expect);
1348 if(auth_return)
1349 *auth_return = auth;
1350 else {
1351 if(auth)
1352 releaseAtom(auth);
1354 if(hopToHop) destroyAtomList(hopToHop);
1355 return i;
1357 fail:
1358 if(hbuf && hbuf != hbuf_small) free(hbuf);
1359 if(name) releaseAtom(name);
1360 if(etag) free(etag);
1361 if(location) free(location);
1362 if(via) releaseAtom(via);
1363 if(expect) releaseAtom(expect);
1364 if(auth) releaseAtom(auth);
1365 if(hopToHop) destroyAtomList(hopToHop);
1367 return -1;
1368 #undef RESIZE_HBUF
1372 parseUrl(const char *url, int len,
1373 int *x_return, int *y_return, int *port_return, int *z_return)
1375 int x, y, z, port = -1, i = 0;
1377 if(len >= 7 && lwrcmp(url, "http://", 7) == 0) {
1378 x = 7;
1379 if(x < len && url[x] == '[') {
1380 /* RFC 2732 */
1381 for(i = x + 1; i < len; i++) {
1382 if(url[i] == ']') {
1383 i++;
1384 break;
1386 if((url[i] != ':') && !letter(url[i]) && !digit(url[i]))
1387 break;
1389 } else {
1390 for(i = x; i < len; i++)
1391 if(url[i] == ':' || url[i] == '/')
1392 break;
1394 y = i;
1396 if(i < len && url[i] == ':') {
1397 int j;
1398 j = atoi_n(url, i + 1, len, &port);
1399 if(j < 0) {
1400 port = 80;
1401 } else {
1402 i = j;
1404 } else {
1405 port = 80;
1407 } else {
1408 x = -1;
1409 y = -1;
1412 z = i;
1414 *x_return = x;
1415 *y_return = y;
1416 *port_return = port;
1417 *z_return = z;
1418 return 0;
1422 urlIsLocal(const char *url, int len)
1424 return (len > 0 && url[0] == '/');
1428 urlIsSpecial(const char *url, int len)
1430 return (len >= 8 && memcmp(url, "/polipo/", 8) == 0);
1434 parseChunkSize(const char *restrict buf, int i, int end,
1435 int *chunk_size_return)
1437 int v, d;
1438 v = h2i(buf[i]);
1439 if(v < 0)
1440 return -1;
1442 i++;
1444 while(i < end) {
1445 d = h2i(buf[i]);
1446 if(d < 0)
1447 break;
1448 v = v * 16 + d;
1449 i++;
1452 while(i < end) {
1453 if(buf[i] == ' ' || buf[i] == '\t')
1454 i++;
1455 else
1456 break;
1459 if(i >= end - 1)
1460 return 0;
1462 if(buf[i] != '\r' || buf[i + 1] != '\n')
1463 return -1;
1465 i += 2;
1467 if(v == 0) {
1468 if(i >= end - 1)
1469 return 0;
1470 if(buf[i] != '\r') {
1471 do_log(L_ERROR, "Trailers present!\n");
1472 return -1;
1474 i++;
1475 if(buf[i] != '\n')
1476 return -1;
1477 i++;
1480 *chunk_size_return = v;
1481 return i;
1486 checkVia(AtomPtr name, AtomPtr via)
1488 int i;
1489 char *v;
1490 if(via == NULL || via->length == 0)
1491 return 1;
1493 v = via->string;
1495 i = 0;
1496 while(i < via->length) {
1497 while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' ||
1498 v[i] == '\r' || v[i] == '\n' ||
1499 digit(v[i]) || v[i] == '.')
1500 i++;
1501 if(i + name->length > via->length)
1502 break;
1503 if(memcmp(v + i, name->string, name->length) == 0) {
1504 char c = v[i + name->length];
1505 if(c == '\0' || c == ' ' || c == '\t' || c == ',' ||
1506 c == '\r' || c == '\n')
1507 return 0;
1509 i++;
1510 while(letter(v[i]) || digit(v[i]) || v[i] == '.')
1511 i++;
1513 return 1;