Update copyright date.
[polipo.git] / http_parse.c
blob7f27e3e08a525b12c115b1b6925fd9c175f78d8e
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 static int getNextWord(const char *buf, int i, int *x_return, int *y_return);
26 static int getNextToken(const char *buf, int i, int *x_return, int *y_return);
27 static int getNextTokenInList(const char *buf, int i,
28 int *x_return, int *y_return,
29 int *z_return, int *t_return,
30 int *end_return);
32 static AtomPtr atomConnection, atomProxyConnection, atomContentLength,
33 atomHost, atomAcceptRange, atomTE,
34 atomReferer, atomProxyAuthenticate, atomProxyAuthorization,
35 atomKeepAlive, atomTrailer, atomUpgrade, atomDate, atomExpires,
36 atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified,
37 atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding,
38 atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange,
39 atomVia, atomVary, atomExpect, atomAuthorization,
40 atomSetCookie, atomCookie, atomCookie2,
41 atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation,
42 atomXPolipoBodyOffset;
44 AtomPtr atomContentType, atomContentEncoding;
46 int censorReferer = 0;
47 int laxHttpParser = 1;
49 static AtomListPtr censoredHeaders;
51 void
52 preinitHttpParser()
54 CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter,
55 "Censor referer headers.");
56 censoredHeaders = makeAtomList(NULL, 0);
57 if(censoredHeaders == NULL) {
58 do_log(L_ERROR, "Couldn't allocate censored atoms.\n");
59 exit(1);
61 CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER,
62 "Headers to censor.");
63 CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter,
64 "Ignore unknown HTTP headers.");
67 void
68 initHttpParser()
70 #define A(name, value) name = internAtom(value); if(!name) goto fail;
71 /* These must be in lower-case */
72 A(atomConnection, "connection");
73 A(atomProxyConnection, "proxy-connection");
74 A(atomContentLength, "content-length");
75 A(atomHost, "host");
76 A(atomAcceptRange, "accept-range");
77 A(atomTE, "te");
78 A(atomReferer, "referer");
79 A(atomProxyAuthenticate, "proxy-authenticate");
80 A(atomProxyAuthorization, "proxy-authorization");
81 A(atomKeepAlive, "keep-alive");
82 A(atomTrailer, "trailer");
83 A(atomUpgrade, "upgrade");
84 A(atomDate, "date");
85 A(atomExpires, "expires");
86 A(atomIfModifiedSince, "if-modified-since");
87 A(atomIfUnmodifiedSince, "if-unmodified-since");
88 A(atomIfRange, "if-range");
89 A(atomLastModified, "last-modified");
90 A(atomIfMatch, "if-match");
91 A(atomIfNoneMatch, "if-none-match");
92 A(atomAge, "age");
93 A(atomTransferEncoding, "transfer-encoding");
94 A(atomETag, "etag");
95 A(atomCacheControl, "cache-control");
96 A(atomPragma, "pragma");
97 A(atomContentRange, "content-range");
98 A(atomRange, "range");
99 A(atomVia, "via");
100 A(atomContentType, "content-type");
101 A(atomContentEncoding, "content-encoding");
102 A(atomVary, "vary");
103 A(atomExpect, "expect");
104 A(atomAuthorization, "authorization");
105 A(atomSetCookie, "set-cookie");
106 A(atomCookie, "cookie");
107 A(atomCookie2, "cookie2");
108 A(atomXPolipoDate, "x-polipo-date");
109 A(atomXPolipoAccess, "x-polipo-access");
110 A(atomXPolipoLocation, "x-polipo-location");
111 A(atomXPolipoBodyOffset, "x-polipo-body-offset");
112 #undef A
113 return;
115 fail:
116 do_log(L_ERROR, "Couldn't allocate atom.\n");
117 exit(1);
120 static int
121 getNextWord(const char *restrict buf, int i, int *x_return, int *y_return)
123 int x, y;
124 while(buf[i] == ' ') i++;
125 if(buf[i] == '\n' || buf[i] == '\r') return -1;
126 x = i;
127 while(buf[i] > 32 && buf[i] < 127) i++;
128 y = i;
130 *x_return = x;
131 *y_return = y;
133 return 0;
136 static int
137 skipComment(const char *restrict buf, int i)
139 assert(buf[i] == '(');
141 i++;
142 while(1) {
143 if(buf[i] == '\\' && buf[i + 1] == ')') i+=2;
144 else if(buf[i] == ')') return i + 1;
145 else if(buf[i] == '\n') {
146 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
147 i += 2;
148 else
149 return -1;
150 } else if(buf[i] == '\r') {
151 if(buf[i + 1] != '\n') return -1;
152 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
153 i += 3;
154 else
155 return -1;
156 } else {
157 i++;
160 return i;
164 static int
165 skipWhitespace(const char *restrict buf, int i)
167 while(1) {
168 if(buf[i] == ' ' || buf[i] == '\t')
169 i++;
170 else if(buf[i] == '(') {
171 i = skipComment(buf, i);
172 if(i < 0) return -1;
173 } else if(buf[i] == '\n') {
174 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
175 i += 2;
176 else
177 return i;
178 } else if(buf[i] == '\r' && buf[i + 1] == '\n') {
179 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
180 i += 3;
181 else
182 return i;
183 } else
184 return i;
188 static int
189 getNextToken(const char *restrict buf, int i, int *x_return, int *y_return)
191 int x, y;
192 again:
193 while(buf[i] == ' ' || buf[i] == '\t')
194 i++;
195 if(buf[i] == '(') {
196 i++;
197 while(buf[i] != ')') {
198 if(buf[i] == '\n' || buf[i] == '\r')
199 return -1;
200 if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r')
201 buf += 2;
202 else
203 buf++;
205 goto again;
207 if(buf[i] == '\n') {
208 if(buf[i + 1] == ' ' || buf[i + 1] == '\t') {
209 i += 2;
210 goto again;
211 } else {
212 return -1;
215 if(buf[i] == '\r') {
216 if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) {
217 i += 3;
218 goto again;
219 } else {
220 return -1;
223 x = i;
224 while(buf[i] > 32 && buf[i] < 127) {
225 switch(buf[i]) {
226 case '(': case ')': case '<': case '>': case '@':
227 case ',': case ';': case ':': case '\\': case '/':
228 case '[': case ']': case '?': case '=':
229 case '{': case '}': case ' ': case '\t':
230 goto out;
231 default:
232 i++;
235 out:
236 y = i;
238 *x_return = x;
239 *y_return = y;
241 return y;
244 static int
245 getNextETag(const char * restrict buf, int i,
246 int *x_return, int *y_return, int *weak_return)
248 int weak = 0;
249 int x, y;
250 while(buf[i] == ' ' || buf[i] == '\t')
251 i++;
252 if(buf[i] == 'W' && buf[i + 1] == '/') {
253 weak = 1;
254 i += 2;
256 if(buf[i] == '"')
257 i++;
258 else
259 return -1;
261 x = i;
262 while(buf[i] != '"') {
263 if(buf[i] == '\r' && buf[i] == '\n')
264 return -1;
265 i++;
267 y = i;
268 i++;
270 *x_return = x;
271 *y_return = y;
272 *weak_return = weak;
273 return i;
276 static int
277 getNextTokenInList(const char *restrict buf, int i,
278 int *x_return, int *y_return,
279 int *z_return, int *t_return,
280 int *end_return)
282 int j, x, y, z = -1, t = -1, end;
283 j = getNextToken(buf, i, &x, &y);
284 if(j < 0)
285 return -1;
286 while(buf[j] == ' ' || buf[j] == '\t')
287 j++;
289 if(buf[j] == '=') {
290 j++;
291 while(buf[j] == ' ' || buf[j] == '\t')
292 j++;
293 z = j;
294 while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r')
295 j++;
298 if(buf[j] == '\n' || buf[j] == '\r') {
299 if(buf[j] == '\r') {
300 if(buf[j + 1] != '\n')
301 return -1;
302 j += 2;
303 } else
304 j++;
305 end = 1;
306 if(buf[j] == ' ' || buf[j] == '\t') {
307 while(buf[j] == ' ' || buf[j] == '\t')
308 j++;
309 end = 0;
311 } else if(buf[j] == ',') {
312 j++;
313 while(buf[j] == ' ' || buf[j] == '\t')
314 j++;
315 end = 0;
316 } else {
317 return -1;
320 *x_return = x;
321 *y_return = y;
322 if(z_return)
323 *z_return = z;
324 if(t_return)
325 *t_return = t;
326 *end_return = end;
327 return j;
330 static inline int
331 token_compare(const char *buf, int start, int end, const char *s)
333 return (strcasecmp_n(s, buf + start, end - start) == 0);
336 static int
337 skipEol(const char *restrict buf, int i)
339 while(buf[i] == ' ')
340 i++;
341 if(buf[i] == '\n')
342 return i + 1;
343 else if(buf[i] == '\r') {
344 if(buf[i + 1] == '\n')
345 return i + 2;
346 else
347 return -1;
348 } else {
349 return -1;
353 static int
354 skipToEol(const char *restrict buf, int i, int *start_return)
356 while(buf[i] != '\n' && buf[i] != '\r')
357 i++;
358 if(buf[i] == '\n') {
359 *start_return = i;
360 return i + 1;
361 } else if(buf[i] == '\r') {
362 if(buf[i + 1] == '\n') {
363 *start_return = i;
364 return i + 2;
365 } else {
366 return -1;
369 return -1;
372 static int
373 getHeaderValue(const char *restrict buf, int start,
374 int *value_start_return, int *value_end_return)
376 int i, j, k;
378 while(buf[start] == ' ' || buf[start] == '\t')
379 start++;
380 i = start;
381 again:
382 j = skipToEol(buf, i, &k);
383 if(j < 0)
384 return -1;
385 if(buf[j] == ' ' || buf[j] == '\t') {
386 i = j + 1;
387 goto again;
389 *value_start_return = start;
390 *value_end_return = k;
391 return j;
395 httpParseClientFirstLine(const char *restrict buf, int offset,
396 int *method_return,
397 AtomPtr *url_return,
398 int *version_return)
400 int i = 0;
401 int x, y;
402 int method;
403 AtomPtr url;
404 int version = HTTP_UNKNOWN;
405 int eol;
407 i = offset;
408 i = getNextWord(buf, i, &x, &y);
409 if(i < 0) return -1;
410 if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0)
411 method = METHOD_GET;
412 else if(y == x + 4 && memcmp(buf + x, "HEAD", 4) == 0)
413 method = METHOD_HEAD;
414 else if(y == x + 4 && memcmp(buf + x, "POST", 4) == 0)
415 method = METHOD_POST;
416 else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0)
417 method = METHOD_PUT;
418 else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0)
419 method = METHOD_CONNECT;
420 else
421 method = METHOD_UNKNOWN;
423 i = getNextWord(buf, y + 1, &x, &y);
424 if(i < 0) return -1;
426 url = internAtomN(buf + x, y - x);
428 i = getNextWord(buf, y + 1, &x, &y);
429 if(i < 0) {
430 releaseAtom(url);
431 return -1;
434 if(y == x + 8) {
435 if(memcmp(buf + x, "HTTP/1.", 7) != 0)
436 version = HTTP_UNKNOWN;
437 else if(buf[x + 7] == '0')
438 version = HTTP_10;
439 else if(buf[x + 7] >= '1' && buf[x + 7] <= '9')
440 version = HTTP_11;
441 else
442 version = HTTP_UNKNOWN;
445 eol = skipEol(buf, y);
446 if(eol < 0) return -1;
448 *method_return = method;
449 if(url_return)
450 *url_return = url;
451 else
452 releaseAtom(url);
453 *version_return = version;
454 return eol;
458 httpParseServerFirstLine(const char *restrict buf,
459 int *status_return,
460 int *version_return,
461 AtomPtr *message_return)
463 int i = 0;
464 int x, y, eol;
465 int status;
466 int version = HTTP_UNKNOWN;
468 i = getNextWord(buf, 0, &x, &y);
469 if(i < 0)
470 return -1;
471 if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0)
472 version = HTTP_10;
473 else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0)
474 version = HTTP_11;
475 else
476 version = HTTP_UNKNOWN;
478 i = getNextWord(buf, y + 1, &x, &y);
479 if(i < 0) return -1;
480 if(y == x + 3)
481 status = atol(buf + x);
482 else return -1;
484 i = skipToEol(buf, y, &eol);
485 if(i < 0) return -1;
487 *status_return = status;
488 *version_return = version;
489 if(message_return) {
490 /* Netscape enterprise bug */
491 if(eol > y)
492 *message_return = internAtomN(buf + y + 1, eol - y - 1);
493 else
494 *message_return = internAtom("No message");
496 return i;
499 static int
500 parseInt(const char *restrict buf, int start, int *val_return)
502 int i = start, val = 0;
503 if(!digit(buf[i]))
504 return -1;
505 while(digit(buf[i])) {
506 val = val * 10 + (buf[i] - '0');
507 i++;
509 *val_return = val;
510 return i;
513 /* Returned *name_start_return is -1 at end of headers, -2 if the line
514 couldn't be parsed. */
515 static int
516 parseHeaderLine(const char *restrict buf, int start,
517 int *name_start_return, int *name_end_return,
518 int *value_start_return, int *value_end_return)
520 int i;
521 int name_start, name_end, value_start, value_end;
523 if(buf[start] == '\n') {
524 *name_start_return = -1;
525 return start + 1;
527 if(buf[start] == '\r' && buf[start + 1] == '\n') {
528 *name_start_return = -1;
529 return start + 2;
532 i = getNextToken(buf, start, &name_start, &name_end);
533 if(i < 0 || buf[i] != ':')
534 goto syntax;
535 i++;
536 while(buf[i] == ' ' || buf[i] == '\t')
537 i++;
539 i = getHeaderValue(buf, i, &value_start, &value_end);
540 if(i < 0)
541 goto syntax;
543 *name_start_return = name_start;
544 *name_end_return = name_end;
545 *value_start_return = value_start;
546 *value_end_return = value_end;
547 return i;
549 syntax:
550 i = start;
551 while(1) {
552 if(buf[i] == '\n') {
553 i++;
554 break;
556 if(buf[i] == '\r' && buf[i + 1] == '\n') {
557 i += 2;
558 break;
560 i++;
562 *name_start_return = -2;
563 return i;
567 findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return)
569 int i = from;
570 int eol = 0;
571 while(i < to) {
572 if(buf[i] == '\n') {
573 if(eol) {
574 *body_return = i + 1;
575 return eol;
577 eol = i;
578 i++;
579 } else if(buf[i] == '\r') {
580 if(i < to - 1 && buf[i + 1] == '\n') {
581 if(eol) {
582 *body_return = eol;
583 return i + 2;
585 eol = i;
586 i += 2;
587 } else {
588 eol = 0;
589 i++;
591 } else {
592 eol = 0;
593 i++;
596 return -1;
599 static int
600 parseContentRange(const char *restrict buf, int i,
601 int *from_return, int *to_return, int *full_len_return)
603 int j;
604 int from, to, full_len;
606 i = skipWhitespace(buf, i);
607 if(i < 0) return -1;
608 if(!token_compare(buf, i, i + 5, "bytes"))
609 return -1;
610 i += 5;
611 i = skipWhitespace(buf, i);
612 if(buf[i] == '*') {
613 from = 0;
614 to = -1;
615 i++;
616 } else {
617 i = parseInt(buf, i, &from);
618 if(i < 0) return -1;
619 if(buf[i] != '-') return -1;
620 i++;
621 i = parseInt(buf, i, &to);
622 if(i < 0) return -1;
623 to = to + 1;
625 if(buf[i] != '/')
626 return -1;
627 i++;
628 if(buf[i] == '*')
629 full_len = -1;
630 else {
631 i = parseInt(buf, i, &full_len);
632 if(i < 0) return -1;
634 j = skipEol(buf, i);
635 if(j < 0)
636 return -1;
638 *from_return = from;
639 *to_return = to;
640 *full_len_return = full_len;
641 return i;
644 static int
645 parseRange(const char *restrict buf, int i,
646 int *from_return, int *to_return)
648 int j;
649 int from, to;
651 i = skipWhitespace(buf, i);
652 if(i < 0)
653 return -1;
654 if(!token_compare(buf, i, i + 6, "bytes="))
655 return -1;
656 i += 6;
657 i = skipWhitespace(buf, i);
658 if(buf[i] == '-') {
659 from = 0;
660 } else {
661 i = parseInt(buf, i, &from);
662 if(i < 0) return -1;
664 if(buf[i] != '-')
665 return -1;
666 i++;
667 j = parseInt(buf, i, &to);
668 if(j < 0)
669 to = -1;
670 else {
671 to = to + 1;
672 i = j;
674 j = skipEol(buf, i);
675 if(j < 0) return -1;
676 *from_return = from;
677 *to_return = to;
678 return i;
681 static int
682 urlSameHost(const char *url1, int len1, const char *url2, int len2)
684 int i;
685 if(len1 < 7 || len2 < 7)
686 return 0;
687 if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0)
688 return 0;
690 i = 7;
691 while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') {
692 if((url1[i] | 0x20) != (url2[i] | 0x20))
693 break;
694 i++;
697 if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/')))
698 return 1;
699 return 0;
702 static char *
703 resize_hbuf(char *hbuf, int *size, char *hbuf_small)
705 int new_size = 2 * *size;
706 char *new_hbuf;
708 if(new_size <= *size)
709 goto fail;
711 if(hbuf == hbuf_small) {
712 new_hbuf = malloc(new_size);
713 if(new_hbuf == NULL) goto fail;
714 memcpy(new_hbuf, hbuf, *size);
715 } else {
716 new_hbuf = realloc(hbuf, new_size);
717 if(new_hbuf == NULL) goto fail;
719 *size = new_size;
720 return new_hbuf;
722 fail:
723 if(hbuf != hbuf_small)
724 free(hbuf);
725 *size = 0;
726 return NULL;
730 httpParseHeaders(int client, AtomPtr url,
731 const char *buf, int start, HTTPRequestPtr request,
732 AtomPtr *headers_return,
733 int *len_return, CacheControlPtr cache_control_return,
734 HTTPConditionPtr *condition_return, int *te_return,
735 time_t *date_return, time_t *last_modified_return,
736 time_t *expires_return, time_t *polipo_age_return,
737 time_t *polipo_access_return, int *polipo_body_offset_return,
738 int *age_return, char **etag_return, AtomPtr *expect_return,
739 HTTPRangePtr range_return, HTTPRangePtr content_range_return,
740 char **location_return, AtomPtr *via_return,
741 AtomPtr *auth_return)
743 int local = url ? urlIsLocal(url->string, url->length) : 0;
744 char hbuf_small[512];
745 char *hbuf = hbuf_small;
746 int hbuf_size = 512, hbuf_length = 0;
747 int i, j,
748 name_start, name_end, value_start, value_end,
749 token_start, token_end, end;
750 AtomPtr name = NULL;
751 time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1,
752 polipo_access = -1, polipo_body_offset = -1;
753 int len = -1;
754 CacheControlRec cache_control;
755 char *endptr;
756 int te = TE_IDENTITY;
757 int age = -1;
758 char *etag = NULL, *ifrange = NULL;
759 int persistent = (!request || (request->connection->version != HTTP_10));
760 char *location = NULL;
761 AtomPtr via = NULL;
762 AtomPtr auth = NULL;
763 AtomPtr expect = NULL;
764 HTTPConditionPtr condition;
765 time_t ims = -1, inms = -1;
766 char *im = NULL, *inm = NULL;
767 AtomListPtr hopToHop = NULL;
768 HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1};
769 int haveCacheControl = 0;
771 #define RESIZE_HBUF() \
772 do { \
773 hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \
774 if(hbuf == NULL) \
775 goto fail; \
776 } while(0)
778 cache_control.flags = 0;
779 cache_control.max_age = -1;
780 cache_control.s_maxage = -1;
781 cache_control.min_fresh = -1;
782 cache_control.max_stale = -1;
784 i = start;
786 while(1) {
787 i = parseHeaderLine(buf, i,
788 &name_start, &name_end, &value_start, &value_end);
789 if(i < 0) {
790 do_log(L_ERROR, "Couldn't find end of header line.\n");
791 goto fail;
794 if(name_start == -1)
795 break;
797 if(name_start < 0)
798 continue;
800 name = internAtomLowerN(buf + name_start, name_end - name_start);
802 if(name == atomConnection) {
803 j = getNextTokenInList(buf, value_start,
804 &token_start, &token_end, NULL, NULL,
805 &end);
806 while(1) {
807 if(j < 0) {
808 do_log(L_ERROR, "Couldn't parse Connection: ");
809 do_log_n(L_ERROR, buf + value_start,
810 value_end - value_start);
811 do_log(L_ERROR, ".\n");
812 goto fail;
814 if(token_compare(buf, token_start, token_end, "close")) {
815 persistent = 0;
816 } else if(token_compare(buf, token_start, token_end,
817 "keep-alive")) {
818 persistent = 1;
819 } else {
820 if(hopToHop == NULL)
821 hopToHop = makeAtomList(NULL, 0);
822 if(hopToHop == NULL) {
823 do_log(L_ERROR, "Couldn't allocate atom list.\n");
824 goto fail;
826 atomListCons(internAtomLowerN(buf + token_start,
827 token_end - token_start),
828 hopToHop);
830 if(end)
831 break;
832 j = getNextTokenInList(buf, j,
833 &token_start, &token_end, NULL, NULL,
834 &end);
836 } else if(name == atomCacheControl)
837 haveCacheControl = 1;
839 releaseAtom(name);
840 name = NULL;
843 i = start;
845 while(1) {
846 i = parseHeaderLine(buf, i,
847 &name_start, &name_end, &value_start, &value_end);
848 if(i < 0) {
849 do_log(L_ERROR, "Couldn't find end of header line.\n");
850 goto fail;
853 if(name_start == -1)
854 break;
856 if(name_start < 0) {
857 do_log(L_WARN, "Couldn't parse header line.\n");
858 if(laxHttpParser)
859 continue;
860 else
861 goto fail;
864 name = internAtomLowerN(buf + name_start, name_end - name_start);
866 if(name == atomProxyConnection) {
867 j = getNextTokenInList(buf, value_start,
868 &token_start, &token_end, NULL, NULL,
869 &end);
870 while(1) {
871 if(j < 0) {
872 do_log(L_WARN, "Couldn't parse Proxy-Connection:");
873 do_log_n(L_WARN, buf + value_start,
874 value_end - value_start);
875 do_log(L_WARN, ".\n");
876 persistent = 0;
877 break;
879 if(token_compare(buf, token_start, token_end, "close")) {
880 persistent = 0;
881 } else if(token_compare(buf, token_start, token_end,
882 "keep-alive")) {
883 persistent = 1;
885 if(end)
886 break;
887 j = getNextTokenInList(buf, j,
888 &token_start, &token_end, NULL, NULL,
889 &end);
891 } else if(name == atomContentLength) {
892 j = skipWhitespace(buf, value_start);
893 if(j < 0) {
894 do_log(L_WARN, "Couldn't parse Content-Length: \n");
895 do_log_n(L_WARN, buf + value_start, value_end - value_start);
896 do_log(L_WARN, ".\n");
897 len = -1;
898 } else {
899 len = strtol(buf + value_start, &endptr, 10);
900 if(endptr <= buf + value_start) {
901 do_log(L_WARN, "Couldn't parse Content-Length: \n");
902 do_log_n(L_WARN, buf + value_start,
903 value_end - value_start);
904 do_log(L_WARN, ".\n");
905 len = -1;
908 } else if((!local && name == atomProxyAuthorization) ||
909 (local && name == atomAuthorization)) {
910 if(auth_return) {
911 auth = internAtomN(buf + value_start, value_end - value_start);
912 if(auth == NULL) {
913 do_log(L_ERROR, "Couldn't allocate authorization.\n");
914 goto fail;
917 } else if(name == atomReferer) {
918 int h;
919 if(censorReferer == 0 ||
920 (censorReferer == 1 && url != NULL &&
921 urlSameHost(url->string, url->length,
922 buf + value_start, value_end - value_start))) {
923 while(hbuf_length > hbuf_size - 2)
924 RESIZE_HBUF();
925 hbuf[hbuf_length++] = '\r';
926 hbuf[hbuf_length++] = '\n';
927 do {
928 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
929 buf + name_start, value_end - name_start);
930 if(h < 0) RESIZE_HBUF();
931 } while(h < 0);
932 hbuf_length = h;
934 } else if(name == atomTrailer || name == atomUpgrade) {
935 do_log(L_ERROR, "Trailers or upgrade present.\n");
936 goto fail;
937 } else if(name == atomDate || name == atomExpires ||
938 name == atomIfModifiedSince ||
939 name == atomIfUnmodifiedSince ||
940 name == atomLastModified ||
941 name == atomXPolipoDate || name == atomXPolipoAccess) {
942 time_t t;
943 j = parse_time(buf, value_start, value_end, &t);
944 if(j < 0) {
945 if(name != atomExpires) {
946 do_log(L_WARN, "Couldn't parse %s: ", name->string);
947 do_log_n(L_WARN, buf + value_start,
948 value_end - value_start);
949 do_log(L_WARN, "\n");
951 t = -1;
953 if(name == atomDate) {
954 if(t >= 0)
955 date = t;
956 } else if(name == atomExpires) {
957 if(t >= 0)
958 expires = t;
959 else
960 expires = 0;
961 } else if(name == atomLastModified)
962 last_modified = t;
963 else if(name == atomIfModifiedSince)
964 ims = t;
965 else if(name == atomIfUnmodifiedSince)
966 inms = t;
967 else if(name == atomXPolipoDate)
968 polipo_age = t;
969 else if(name == atomXPolipoAccess)
970 polipo_access = t;
971 } else if(name == atomAge) {
972 j = skipWhitespace(buf, value_start);
973 if(j < 0) {
974 age = -1;
975 } else {
976 age = strtol(buf + value_start, &endptr, 10);
977 if(endptr <= buf + value_start)
978 age = -1;
980 if(age < 0) {
981 do_log(L_WARN, "Couldn't parse age: \n");
982 do_log_n(L_WARN, buf + value_start, value_end - value_start);
983 do_log(L_WARN, " -- ignored.\n");
985 } else if(name == atomXPolipoBodyOffset) {
986 j = skipWhitespace(buf, value_start);
987 if(j < 0) {
988 do_log(L_ERROR, "Couldn't parse body offset.\n");
989 goto fail;
990 } else {
991 polipo_body_offset = strtol(buf + value_start, &endptr, 10);
992 if(endptr <= buf + value_start) {
993 do_log(L_ERROR, "Couldn't parse body offset.\n");
994 goto fail;
997 } else if(name == atomTransferEncoding) {
998 if(token_compare(buf, value_start, value_end, "identity"))
999 te = TE_IDENTITY;
1000 else if(token_compare(buf, value_start, value_end, "chunked"))
1001 te = TE_CHUNKED;
1002 else
1003 te = TE_UNKNOWN;
1004 } else if(name == atomETag ||
1005 name == atomIfNoneMatch || name == atomIfMatch ||
1006 name == atomIfRange) {
1007 int x, y;
1008 int weak;
1009 char *e;
1010 j = getNextETag(buf, value_start, &x, &y, &weak);
1011 if(j < 0) {
1012 if(buf[value_start] != '\r' && buf[value_start] != '\n')
1013 do_log(L_ERROR, "Couldn't parse ETag.\n");
1014 } else if(weak) {
1015 do_log(L_WARN, "Server returned weak ETag -- ignored.\n");
1016 } else {
1017 e = strdup_n(buf + x, y - x);
1018 if(e == NULL) goto fail;
1019 if(name == atomETag) {
1020 if(!etag)
1021 etag = e;
1022 else
1023 free(e);
1024 } else if(name == atomIfNoneMatch) {
1025 if(!inm)
1026 inm = e;
1027 else
1028 free(e);
1029 } else if(name == atomIfMatch) {
1030 if(!im)
1031 im = e;
1032 else
1033 free(e);
1034 } else if(name == atomIfRange) {
1035 if(!ifrange)
1036 ifrange = e;
1037 else
1038 free(e);
1039 } else {
1040 abort();
1043 } else if(name == atomCacheControl) {
1044 int v_start, v_end;
1045 j = getNextTokenInList(buf, value_start,
1046 &token_start, &token_end,
1047 &v_start, &v_end,
1048 &end);
1049 while(1) {
1050 if(j < 0) {
1051 do_log(L_WARN, "Couldn't parse Cache-Control.\n");
1052 cache_control.flags |= CACHE_NO;
1053 break;
1055 if(token_compare(buf, token_start, token_end, "no-cache")) {
1056 cache_control.flags |= CACHE_NO;
1057 } else if(token_compare(buf, token_start, token_end,
1058 "public")) {
1059 cache_control.flags |= CACHE_PUBLIC;
1060 } else if(token_compare(buf, token_start, token_end,
1061 "private")) {
1062 cache_control.flags |= CACHE_PRIVATE;
1063 } else if(token_compare(buf, token_start, token_end,
1064 "no-store")) {
1065 cache_control.flags |= CACHE_NO_STORE;
1066 } else if(token_compare(buf, token_start, token_end,
1067 "no-transform")) {
1068 cache_control.flags |= CACHE_NO_TRANSFORM;
1069 } else if(token_compare(buf, token_start, token_end,
1070 "must-revalidate") ||
1071 token_compare(buf, token_start, token_end,
1072 "must-validate")) { /* losers */
1073 cache_control.flags |= CACHE_MUST_REVALIDATE;
1074 } else if(token_compare(buf, token_start, token_end,
1075 "proxy-revalidate")) {
1076 cache_control.flags |= CACHE_PROXY_REVALIDATE;
1077 } else if(token_compare(buf, token_start, token_end,
1078 "only-if-cached")) {
1079 cache_control.flags |= CACHE_ONLY_IF_CACHED;
1080 } else if(token_compare(buf, token_start, token_end,
1081 "max-age") ||
1082 token_compare(buf, token_start, token_end,
1083 "maxage")) { /* losers */
1084 int a;
1085 if(v_start <= 0 || !digit(buf[v_start])) {
1086 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1087 do_log_n(L_WARN, buf + token_start,
1088 (v_end >= 0 ? v_end : token_end) -
1089 token_start);
1090 do_log(L_WARN, "\n");
1092 a = atoi(buf + v_start);
1093 cache_control.max_age = a;
1094 } else if(token_compare(buf, token_start, token_end,
1095 "s-maxage")) {
1096 int a;
1097 if(v_start <= 0 || !digit(buf[v_start])) {
1098 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1099 do_log_n(L_WARN, buf + token_start,
1100 (v_end >= 0 ? v_end : token_end) -
1101 token_start);
1102 do_log(L_WARN, "\n");
1104 a = atoi(buf + v_start);
1105 cache_control.max_age = a;
1106 } else if(token_compare(buf, token_start, token_end,
1107 "min-fresh")) {
1108 int a;
1109 if(v_start <= 0 || !digit(buf[v_start])) {
1110 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1111 do_log_n(L_WARN, buf + token_start,
1112 (v_end >= 0 ? v_end : token_end) -
1113 token_start);
1114 do_log(L_WARN, "\n");
1116 a = atoi(buf + v_start);
1117 cache_control.max_age = a;
1118 } else if(token_compare(buf, token_start, token_end,
1119 "max-stale")) {
1120 int a;
1121 if(v_start <= 0 || !digit(buf[v_start])) {
1122 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1123 do_log_n(L_WARN, buf + token_start,
1124 (v_end >= 0 ? v_end : token_end) -
1125 token_start);
1126 do_log(L_WARN, "\n");
1128 a = atoi(buf + v_start);
1129 cache_control.max_stale = a;
1130 } else {
1131 do_log(L_WARN, "Unsupported Cache-Control directive ");
1132 do_log_n(L_WARN, buf + token_start,
1133 (v_end >= 0 ? v_end : token_end) - token_start);
1134 do_log(L_WARN, " -- ignored.\n");
1136 if(end)
1137 break;
1138 j = getNextTokenInList(buf, j,
1139 &token_start, &token_end,
1140 &v_start, &v_end,
1141 &end);
1143 } else if(name == atomContentRange) {
1144 if(!client) {
1145 j = parseContentRange(buf, value_start,
1146 &content_range.from, &content_range.to,
1147 &content_range.full_length);
1148 if(j < 0) {
1149 do_log(L_ERROR, "Couldn't parse Content-Range: ");
1150 do_log_n(L_ERROR, buf + value_start,
1151 value_end - value_start);
1152 do_log(L_ERROR, "\n");
1153 goto fail;
1155 } else {
1156 do_log(L_ERROR, "Content-Range from client.\n");
1157 goto fail;
1159 } else if(name == atomRange) {
1160 if(client) {
1161 j = parseRange(buf, value_start, &range.from, &range.to);
1162 if(j < 0) {
1163 do_log(L_WARN, "Couldn't parse Range -- ignored.\n");
1164 range.from = -1;
1165 range.to = -1;
1167 } else {
1168 do_log(L_WARN, "Range from server -- ignored\n");
1170 } else if(name == atomXPolipoLocation) {
1171 if(location_return) {
1172 location =
1173 strdup_n(buf + value_start, value_end - value_start);
1174 if(location == NULL) {
1175 do_log(L_ERROR, "Couldn't allocate location.\n");
1176 goto fail;
1179 } else if(name == atomVia) {
1180 if(via_return) {
1181 AtomPtr new_via, full_via;
1182 new_via =
1183 internAtomN(buf + value_start, value_end - value_start);
1184 if(new_via == NULL) {
1185 do_log(L_ERROR, "Couldn't allocate via.\n");
1186 goto fail;
1188 if(via) {
1189 full_via =
1190 internAtomF("%s, %s", via->string, new_via->string);
1191 releaseAtom(new_via);
1192 if(full_via == NULL) {
1193 do_log(L_ERROR, "Couldn't allocate via");
1194 goto fail;
1196 releaseAtom(via);
1197 via = full_via;
1198 } else {
1199 via = new_via;
1202 } else if(name == atomExpect) {
1203 if(expect_return) {
1204 expect = internAtomLowerN(buf + value_start,
1205 value_end - value_start);
1206 if(expect == NULL) {
1207 do_log(L_ERROR, "Couldn't allocate expect.\n");
1208 goto fail;
1211 } else {
1212 if(!client && name == atomContentType) {
1213 if(token_compare(buf, value_start, value_end,
1214 "multipart/byteranges")) {
1215 do_log(L_ERROR,
1216 "Server returned multipart/byteranges -- yuck!\n");
1217 goto fail;
1220 if(name == atomVary) {
1221 if(!token_compare(buf, value_start, value_end, "host") &&
1222 !token_compare(buf, value_start, value_end, "*")) {
1223 /* What other vary headers should be ignored? */
1224 do_log(L_VARY, "Vary header present (");
1225 do_log_n(L_VARY,
1226 buf + value_start, value_end - value_start);
1227 do_log(L_VARY, ").\n");
1229 cache_control.flags |= CACHE_VARY;
1230 } else if(name == atomAuthorization) {
1231 cache_control.flags |= CACHE_AUTHORIZATION;
1234 if(name == atomPragma) {
1235 /* Pragma is only defined for the client, and the only
1236 standard value is no-cache (RFC 1945, 10.12).
1237 However, we honour a Pragma: no-cache for both the client
1238 and the server when there's no Cache-Control header. In
1239 all cases, we pass the Pragma header to the next hop. */
1240 if(!haveCacheControl) {
1241 j = getNextTokenInList(buf, value_start,
1242 &token_start, &token_end, NULL, NULL,
1243 &end);
1244 while(1) {
1245 if(j < 0) {
1246 do_log(L_WARN, "Couldn't parse Pragma.\n");
1247 cache_control.flags |= CACHE_NO;
1248 break;
1250 if(token_compare(buf, token_start, token_end,
1251 "no-cache"))
1252 cache_control.flags = CACHE_NO;
1253 if(end)
1254 break;
1255 j = getNextTokenInList(buf, j, &token_start, &token_end,
1256 NULL, NULL, &end);
1260 if(!client &&
1261 (name == atomSetCookie ||
1262 name == atomCookie || name == atomCookie2))
1263 cache_control.flags |= CACHE_COOKIE;
1265 if(hbuf) {
1266 if(name != atomConnection && name != atomHost &&
1267 name != atomAcceptRange && name != atomTE &&
1268 name != atomProxyAuthenticate &&
1269 name != atomKeepAlive &&
1270 (!hopToHop || !atomListMember(name, hopToHop)) &&
1271 !atomListMember(name, censoredHeaders)) {
1272 int h;
1273 while(hbuf_length > hbuf_size - 2)
1274 RESIZE_HBUF();
1275 hbuf[hbuf_length++] = '\r';
1276 hbuf[hbuf_length++] = '\n';
1277 do {
1278 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
1279 buf + name_start,
1280 value_end - name_start);
1281 if(h < 0) RESIZE_HBUF();
1282 } while(h < 0);
1283 hbuf_length = h;
1287 releaseAtom(name);
1288 name = NULL;
1291 if(headers_return) {
1292 AtomPtr pheaders = NULL;
1293 pheaders = internAtomN(hbuf, hbuf_length);
1294 if(!pheaders)
1295 goto fail;
1296 *headers_return = pheaders;
1298 if(hbuf != hbuf_small)
1299 free(hbuf);
1300 hbuf = NULL;
1301 hbuf_size = 0;
1303 if(request)
1304 if(!persistent)
1305 request->flags &= ~REQUEST_PERSISTENT;
1307 if(te != TE_IDENTITY) len = -1;
1308 if(len_return) *len_return = len;
1309 if(cache_control_return) *cache_control_return = cache_control;
1310 if(condition_return) {
1311 if(ims >= 0 || inms >= 0 || im || inm || ifrange) {
1312 condition = httpMakeCondition();
1313 if(condition) {
1314 condition->ims = ims;
1315 condition->inms = inms;
1316 condition->im = im;
1317 condition->inm = inm;
1318 condition->ifrange = ifrange;
1319 } else {
1320 do_log(L_ERROR, "Couldn't allocate condition.\n");
1321 if(im) free(im);
1322 if(inm) free(inm);
1324 } else {
1325 condition = NULL;
1327 *condition_return = condition;
1328 } else {
1329 assert(!im && !inm);
1332 if(te_return) *te_return = te;
1333 if(date_return) *date_return = date;
1334 if(last_modified_return) *last_modified_return = last_modified;
1335 if(expires_return) *expires_return = expires;
1336 if(polipo_age_return) *polipo_age_return = polipo_age;
1337 if(polipo_access_return) *polipo_access_return = polipo_access;
1338 if(polipo_body_offset_return)
1339 *polipo_body_offset_return = polipo_body_offset;
1340 if(age_return) *age_return = age;
1341 if(etag_return)
1342 *etag_return = etag;
1343 else {
1344 if(etag) free(etag);
1346 if(range_return) *range_return = range;
1347 if(content_range_return) *content_range_return = content_range;
1348 if(location_return) {
1349 *location_return = location;
1350 } else {
1351 if(location)
1352 free(location);
1354 if(via_return)
1355 *via_return = via;
1356 else {
1357 if(via)
1358 releaseAtom(via);
1360 if(expect_return)
1361 *expect_return = expect;
1362 else {
1363 if(expect)
1364 releaseAtom(expect);
1366 if(auth_return)
1367 *auth_return = auth;
1368 else {
1369 if(auth)
1370 releaseAtom(auth);
1372 if(hopToHop) destroyAtomList(hopToHop);
1373 return i;
1375 fail:
1376 if(hbuf && hbuf != hbuf_small) free(hbuf);
1377 if(name) releaseAtom(name);
1378 if(etag) free(etag);
1379 if(location) free(location);
1380 if(via) releaseAtom(via);
1381 if(expect) releaseAtom(expect);
1382 if(auth) releaseAtom(auth);
1383 if(hopToHop) destroyAtomList(hopToHop);
1385 return -1;
1386 #undef RESIZE_HBUF
1390 httpFindHeader(AtomPtr header, const char *headers, int hlen,
1391 int *value_begin_return, int *value_end_return)
1393 int len = header->length;
1394 int i = 0;
1396 while(i + len + 1 < hlen) {
1397 if(headers[i + len] == ':' &&
1398 lwrcmp(headers + i, header->string, len) == 0) {
1399 int j = i + len + 1, k;
1400 while(j < hlen && headers[j] == ' ')
1401 j++;
1402 k = j;
1403 while(k < hlen && headers[k] != '\n' && headers[k] != '\r')
1404 k++;
1405 *value_begin_return = j;
1406 *value_end_return = k;
1407 return 1;
1408 } else {
1409 while(i < hlen && headers[i] != '\n' && headers[i] != '\r')
1410 i++;
1411 i++;
1412 if(i < hlen && headers[i] == '\n')
1413 i++;
1416 return 0;
1420 parseUrl(const char *url, int len,
1421 int *x_return, int *y_return, int *port_return, int *z_return)
1423 int x, y, z, port = -1, i = 0;
1425 if(len >= 7 && lwrcmp(url, "http://", 7) == 0) {
1426 x = 7;
1427 if(x < len && url[x] == '[') {
1428 /* RFC 2732 */
1429 for(i = x + 1; i < len; i++) {
1430 if(url[i] == ']') {
1431 i++;
1432 break;
1434 if((url[i] != ':') && !letter(url[i]) && !digit(url[i]))
1435 break;
1437 } else {
1438 for(i = x; i < len; i++)
1439 if(url[i] == ':' || url[i] == '/')
1440 break;
1442 y = i;
1444 if(i < len && url[i] == ':') {
1445 int j;
1446 j = atoi_n(url, i + 1, len, &port);
1447 if(j < 0) {
1448 port = 80;
1449 } else {
1450 i = j;
1452 } else {
1453 port = 80;
1455 } else {
1456 x = -1;
1457 y = -1;
1460 z = i;
1462 *x_return = x;
1463 *y_return = y;
1464 *port_return = port;
1465 *z_return = z;
1466 return 0;
1470 urlIsLocal(const char *url, int len)
1472 return (len > 0 && url[0] == '/');
1476 urlIsSpecial(const char *url, int len)
1478 return (len >= 8 && memcmp(url, "/polipo/", 8) == 0);
1482 parseChunkSize(const char *restrict buf, int i, int end,
1483 int *chunk_size_return)
1485 int v, d;
1486 v = h2i(buf[i]);
1487 if(v < 0)
1488 return -1;
1490 i++;
1492 while(i < end) {
1493 d = h2i(buf[i]);
1494 if(d < 0)
1495 break;
1496 v = v * 16 + d;
1497 i++;
1500 while(i < end) {
1501 if(buf[i] == ' ' || buf[i] == '\t')
1502 i++;
1503 else
1504 break;
1507 if(i >= end - 1)
1508 return 0;
1510 if(buf[i] != '\r' || buf[i + 1] != '\n')
1511 return -1;
1513 i += 2;
1515 if(v == 0) {
1516 if(i >= end - 1)
1517 return 0;
1518 if(buf[i] != '\r') {
1519 do_log(L_ERROR, "Trailers present!\n");
1520 return -1;
1522 i++;
1523 if(buf[i] != '\n')
1524 return -1;
1525 i++;
1528 *chunk_size_return = v;
1529 return i;
1534 checkVia(AtomPtr name, AtomPtr via)
1536 int i;
1537 char *v;
1538 if(via == NULL || via->length == 0)
1539 return 1;
1541 v = via->string;
1543 i = 0;
1544 while(i < via->length) {
1545 while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' ||
1546 v[i] == '\r' || v[i] == '\n' ||
1547 digit(v[i]) || v[i] == '.')
1548 i++;
1549 if(i + name->length > via->length)
1550 break;
1551 if(memcmp(v + i, name->string, name->length) == 0) {
1552 char c = v[i + name->length];
1553 if(c == '\0' || c == ' ' || c == '\t' || c == ',' ||
1554 c == '\r' || c == '\n')
1555 return 0;
1557 i++;
1558 while(letter(v[i]) || digit(v[i]) || v[i] == '.')
1559 i++;
1561 return 1;