Update CHANGES.
[polipo.git] / http_parse.c
blob671e3b609c93a9e5cf159373d7175da1a1cfe7dc
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 static int getNextWord(const char *buf, int i, int *x_return, int *y_return);
26 static int getNextToken(const char *buf, int i, int *x_return, int *y_return);
27 static int getNextTokenInList(const char *buf, int i,
28 int *x_return, int *y_return,
29 int *z_return, int *t_return,
30 int *end_return);
32 static AtomPtr atomConnection, atomProxyConnection, atomContentLength,
33 atomHost, atomAcceptRange, atomTE,
34 atomReferer, atomProxyAuthenticate, atomProxyAuthorization,
35 atomKeepAlive, atomTrailers, atomUpgrade, atomDate, atomExpires,
36 atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified,
37 atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding,
38 atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange,
39 atomVia, atomVary, atomExpect, atomAuthorization,
40 atomSetCookie, atomCookie, atomCookie2,
41 atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation,
42 atomXPolipoBodyOffset;
44 AtomPtr atomContentType, atomContentEncoding;
46 int censorReferer = 0;
47 int laxHttpParser = 1;
49 static AtomListPtr censoredHeaders;
51 void
52 preinitHttpParser()
54 CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter,
55 "Censor referer headers.");
56 censoredHeaders = makeAtomList(NULL, 0);
57 if(censoredHeaders == NULL) {
58 do_log(L_ERROR, "Couldn't allocate censored atoms.\n");
59 exit(1);
61 CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER,
62 "Headers to censor.");
63 CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter,
64 "Ignore unknown HTTP headers.");
67 void
68 initHttpParser()
70 #define A(name, value) name = internAtom(value); if(!name) goto fail;
71 /* These must be in lower-case */
72 A(atomConnection, "connection");
73 A(atomProxyConnection, "proxy-connection");
74 A(atomContentLength, "content-length");
75 A(atomHost, "host");
76 A(atomAcceptRange, "accept-range");
77 A(atomTE, "te");
78 A(atomReferer, "referer");
79 A(atomProxyAuthenticate, "proxy-authenticate");
80 A(atomProxyAuthorization, "proxy-authorization");
81 A(atomKeepAlive, "keep-alive");
82 A(atomTrailers, "trailers");
83 A(atomUpgrade, "upgrade");
84 A(atomDate, "date");
85 A(atomExpires, "expires");
86 A(atomIfModifiedSince, "if-modified-since");
87 A(atomIfUnmodifiedSince, "if-unmodified-since");
88 A(atomIfRange, "if-range");
89 A(atomLastModified, "last-modified");
90 A(atomIfMatch, "if-match");
91 A(atomIfNoneMatch, "if-none-match");
92 A(atomAge, "age");
93 A(atomTransferEncoding, "transfer-encoding");
94 A(atomETag, "etag");
95 A(atomCacheControl, "cache-control");
96 A(atomPragma, "pragma");
97 A(atomContentRange, "content-range");
98 A(atomRange, "range");
99 A(atomVia, "via");
100 A(atomContentType, "content-type");
101 A(atomContentEncoding, "content-encoding");
102 A(atomVary, "vary");
103 A(atomExpect, "expect");
104 A(atomAuthorization, "authorization");
105 A(atomSetCookie, "set-cookie");
106 A(atomCookie, "cookie");
107 A(atomCookie2, "cookie2");
108 A(atomXPolipoDate, "x-polipo-date");
109 A(atomXPolipoAccess, "x-polipo-access");
110 A(atomXPolipoLocation, "x-polipo-location");
111 A(atomXPolipoBodyOffset, "x-polipo-body-offset");
112 #undef A
113 return;
115 fail:
116 do_log(L_ERROR, "Couldn't allocate atom.\n");
117 exit(1);
120 static int
121 getNextWord(const char *restrict buf, int i, int *x_return, int *y_return)
123 int x, y;
124 while(buf[i] == ' ') i++;
125 if(buf[i] == '\n' || buf[i] == '\r') return -1;
126 x = i;
127 while(buf[i] > 32 && buf[i] < 127) i++;
128 y = i;
130 *x_return = x;
131 *y_return = y;
133 return 0;
136 static int
137 skipComment(const char *restrict buf, int i)
139 assert(buf[i] == '(');
141 i++;
142 while(1) {
143 if(buf[i] == '\\' && buf[i + 1] == ')') i+=2;
144 else if(buf[i] == ')') return i + 1;
145 else if(buf[i] == '\n') {
146 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
147 i += 2;
148 else
149 return -1;
150 } else if(buf[i] == '\r') {
151 if(buf[i + 1] != '\n') return -1;
152 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
153 i += 3;
154 else
155 return -1;
156 } else {
157 i++;
160 return i;
164 static int
165 skipWhitespace(const char *restrict buf, int i)
167 while(1) {
168 if(buf[i] == ' ' || buf[i] == '\t')
169 i++;
170 else if(buf[i] == '(') {
171 i = skipComment(buf, i);
172 if(i < 0) return -1;
173 } else if(buf[i] == '\n') {
174 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
175 i += 2;
176 else
177 return i;
178 } else if(buf[i] == '\r' && buf[i + 1] == '\n') {
179 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
180 i += 3;
181 else
182 return i;
183 } else
184 return i;
188 static int
189 getNextToken(const char *restrict buf, int i, int *x_return, int *y_return)
191 int x, y;
192 again:
193 while(buf[i] == ' ' || buf[i] == '\t')
194 i++;
195 if(buf[i] == '(') {
196 i++;
197 while(buf[i] != ')') {
198 if(buf[i] == '\n' || buf[i] == '\r')
199 return -1;
200 if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r')
201 buf += 2;
202 else
203 buf++;
205 goto again;
207 if(buf[i] == '\n') {
208 if(buf[i + 1] == ' ' || buf[i + 1] == '\t') {
209 i += 2;
210 goto again;
211 } else {
212 return -1;
215 if(buf[i] == '\r') {
216 if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) {
217 i += 3;
218 goto again;
219 } else {
220 return -1;
223 x = i;
224 while(buf[i] > 32 && buf[i] < 127) {
225 switch(buf[i]) {
226 case '(': case ')': case '<': case '>': case '@':
227 case ',': case ';': case ':': case '\\': case '/':
228 case '[': case ']': case '?': case '=':
229 case '{': case '}': case ' ': case '\t':
230 goto out;
231 default:
232 i++;
235 out:
236 y = i;
238 *x_return = x;
239 *y_return = y;
241 return y;
244 static int
245 getNextETag(const char * restrict buf, int i,
246 int *x_return, int *y_return, int *weak_return)
248 int weak = 0;
249 int x, y;
250 while(buf[i] == ' ' || buf[i] == '\t')
251 i++;
252 if(buf[i] == 'W' && buf[i + 1] == '/') {
253 weak = 1;
254 i += 2;
256 if(buf[i] == '"')
257 i++;
258 else
259 return -1;
261 x = i;
262 while(buf[i] != '"') {
263 if(buf[i] == '\r' && buf[i] == '\n')
264 return -1;
265 i++;
267 y = i;
268 i++;
270 *x_return = x;
271 *y_return = y;
272 *weak_return = weak;
273 return i;
276 static int
277 getNextTokenInList(const char *restrict buf, int i,
278 int *x_return, int *y_return,
279 int *z_return, int *t_return,
280 int *end_return)
282 int j, x, y, z = -1, t = -1, end;
283 j = getNextToken(buf, i, &x, &y);
284 if(j < 0)
285 return -1;
286 while(buf[j] == ' ' || buf[j] == '\t')
287 j++;
289 if(buf[j] == '=') {
290 j++;
291 while(buf[j] == ' ' || buf[j] == '\t')
292 j++;
293 z = j;
294 while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r')
295 j++;
298 if(buf[j] == '\n' || buf[j] == '\r') {
299 if(buf[j] == '\r') {
300 if(buf[j + 1] != '\n')
301 return -1;
302 j += 2;
303 } else
304 j++;
305 end = 1;
306 if(buf[j] == ' ' || buf[j] == '\t') {
307 while(buf[j] == ' ' || buf[j] == '\t')
308 j++;
309 end = 0;
311 } else if(buf[j] == ',') {
312 j++;
313 while(buf[j] == ' ' || buf[j] == '\t')
314 j++;
315 end = 0;
316 } else {
317 return -1;
320 *x_return = x;
321 *y_return = y;
322 if(z_return)
323 *z_return = z;
324 if(t_return)
325 *t_return = t;
326 *end_return = end;
327 return j;
330 static inline int
331 token_compare(const char *buf, int start, int end, const char *s)
333 return (strcasecmp_n(s, buf + start, end - start) == 0);
336 static int
337 skipEol(const char *restrict buf, int i)
339 while(buf[i] == ' ')
340 i++;
341 if(buf[i] == '\n')
342 return i + 1;
343 else if(buf[i] == '\r') {
344 if(buf[i + 1] == '\n')
345 return i + 2;
346 else
347 return -1;
348 } else {
349 return -1;
353 static int
354 skipToEol(const char *restrict buf, int i, int *start_return)
356 while(buf[i] != '\n' && buf[i] != '\r')
357 i++;
358 if(buf[i] == '\n') {
359 *start_return = i;
360 return i + 1;
361 } else if(buf[i] == '\r') {
362 if(buf[i + 1] == '\n') {
363 *start_return = i;
364 return i + 2;
365 } else {
366 return -1;
369 return -1;
372 static int
373 getHeaderValue(const char *restrict buf, int start,
374 int *value_start_return, int *value_end_return)
376 int i, j, k;
378 while(buf[start] == ' ' || buf[start] == '\t')
379 start++;
380 i = start;
381 again:
382 j = skipToEol(buf, i, &k);
383 if(j < 0)
384 return -1;
385 if(buf[j] == ' ' || buf[j] == '\t') {
386 i = j + 1;
387 goto again;
389 *value_start_return = start;
390 *value_end_return = k;
391 return j;
395 httpParseClientFirstLine(const char *restrict buf, int offset,
396 int *method_return,
397 AtomPtr *url_return,
398 int *version_return)
400 int i = 0;
401 int x, y;
402 int method;
403 AtomPtr url;
404 int version = HTTP_UNKNOWN;
405 int eol;
407 i = offset;
408 i = getNextWord(buf, i, &x, &y);
409 if(i < 0) return -1;
410 if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0)
411 method = METHOD_GET;
412 else if(y == x + 4 && memcmp(buf + x, "HEAD", 4) == 0)
413 method = METHOD_HEAD;
414 else if(y == x + 4 && memcmp(buf + x, "POST", 4) == 0)
415 method = METHOD_POST;
416 else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0)
417 method = METHOD_PUT;
418 else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0)
419 method = METHOD_CONNECT;
420 else
421 method = METHOD_UNKNOWN;
423 i = getNextWord(buf, y + 1, &x, &y);
424 if(i < 0) return -1;
426 url = internAtomN(buf + x, y - x);
428 i = getNextWord(buf, y + 1, &x, &y);
429 if(i < 0) {
430 releaseAtom(url);
431 return -1;
434 if(y == x + 8) {
435 if(memcmp(buf + x, "HTTP/1.", 7) != 0)
436 version = HTTP_UNKNOWN;
437 else if(buf[x + 7] == '0')
438 version = HTTP_10;
439 else if(buf[x + 7] >= '1' && buf[x + 7] <= '9')
440 version = HTTP_11;
441 else
442 version = HTTP_UNKNOWN;
445 eol = skipEol(buf, y);
446 if(eol < 0) return -1;
448 *method_return = method;
449 if(url_return)
450 *url_return = url;
451 else
452 releaseAtom(url);
453 *version_return = version;
454 return eol;
458 httpParseServerFirstLine(const char *restrict buf,
459 int *status_return,
460 int *version_return,
461 AtomPtr *message_return)
463 int i = 0;
464 int x, y, eol;
465 int status;
466 int version = HTTP_UNKNOWN;
468 i = getNextWord(buf, 0, &x, &y);
469 if(i < 0)
470 return -1;
471 if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0)
472 version = HTTP_10;
473 else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0)
474 version = HTTP_11;
475 else
476 version = HTTP_UNKNOWN;
478 i = getNextWord(buf, y + 1, &x, &y);
479 if(i < 0) return -1;
480 if(y == x + 3)
481 status = atol(buf + x);
482 else return -1;
484 i = skipToEol(buf, y, &eol);
485 if(i < 0) return -1;
487 *status_return = status;
488 *version_return = version;
489 if(message_return) {
490 /* Netscape enterprise bug */
491 if(eol > y)
492 *message_return = internAtomN(buf + y + 1, eol - y - 1);
493 else
494 *message_return = internAtom("No message");
496 return i;
499 static int
500 parseInt(const char *restrict buf, int start, int *val_return)
502 int i = start, val = 0;
503 if(!digit(buf[i]))
504 return -1;
505 while(digit(buf[i])) {
506 val = val * 10 + (buf[i] - '0');
507 i++;
509 *val_return = val;
510 return i;
513 /* Returned *name_start_return is -1 at end of headers, -2 if the line
514 couldn't be parsed. */
515 static int
516 parseHeaderLine(const char *restrict buf, int start,
517 int *name_start_return, int *name_end_return,
518 int *value_start_return, int *value_end_return)
520 int i;
521 int name_start, name_end, value_start, value_end;
523 if(buf[start] == '\n') {
524 *name_start_return = -1;
525 return start + 1;
527 if(buf[start] == '\r' && buf[start + 1] == '\n') {
528 *name_start_return = -1;
529 return start + 2;
532 i = getNextToken(buf, start, &name_start, &name_end);
533 if(i < 0 || buf[i] != ':')
534 goto syntax;
535 i++;
536 while(buf[i] == ' ' || buf[i] == '\t')
537 i++;
539 i = getHeaderValue(buf, i, &value_start, &value_end);
540 if(i < 0)
541 goto syntax;
543 *name_start_return = name_start;
544 *name_end_return = name_end;
545 *value_start_return = value_start;
546 *value_end_return = value_end;
547 return i;
549 syntax:
550 i = start;
551 while(1) {
552 if(buf[i] == '\n') {
553 i++;
554 break;
556 if(buf[i] == '\r' && buf[i + 1] == '\n') {
557 i += 2;
558 break;
560 i++;
562 *name_start_return = -2;
563 return i;
567 findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return)
569 int i = from;
570 int eol = 0;
571 while(i < to) {
572 if(buf[i] == '\n') {
573 if(eol) {
574 *body_return = i + 1;
575 return eol;
577 eol = i;
578 i++;
579 } else if(buf[i] == '\r') {
580 if(i < to - 1 && buf[i + 1] == '\n') {
581 if(eol) {
582 *body_return = eol;
583 return i + 2;
585 eol = i;
586 i += 2;
587 } else {
588 eol = 0;
589 i++;
591 } else {
592 eol = 0;
593 i++;
596 return -1;
599 static int
600 parseContentRange(const char *restrict buf, int i,
601 int *from_return, int *to_return, int *full_len_return)
603 int j;
604 int from, to, full_len;
606 i = skipWhitespace(buf, i);
607 if(i < 0) return -1;
608 if(!token_compare(buf, i, i + 5, "bytes"))
609 return -1;
610 i += 5;
611 i = skipWhitespace(buf, i);
612 if(buf[i] == '*') {
613 from = 0;
614 to = -1;
615 i++;
616 } else {
617 i = parseInt(buf, i, &from);
618 if(i < 0) return -1;
619 if(buf[i] != '-') return -1;
620 i++;
621 i = parseInt(buf, i, &to);
622 if(i < 0) return -1;
623 to = to + 1;
625 if(buf[i] != '/')
626 return -1;
627 i++;
628 if(buf[i] == '*')
629 full_len = -1;
630 else {
631 i = parseInt(buf, i, &full_len);
632 if(i < 0) return -1;
634 j = skipEol(buf, i);
635 if(j < 0)
636 return -1;
638 *from_return = from;
639 *to_return = to;
640 *full_len_return = full_len;
641 return i;
644 static int
645 parseRange(const char *restrict buf, int i,
646 int *from_return, int *to_return)
648 int j;
649 int from, to;
651 i = skipWhitespace(buf, i);
652 if(i < 0)
653 return -1;
654 if(!token_compare(buf, i, i + 6, "bytes="))
655 return -1;
656 i += 6;
657 i = skipWhitespace(buf, i);
658 if(buf[i] == '-') {
659 from = 0;
660 } else {
661 i = parseInt(buf, i, &from);
662 if(i < 0) return -1;
664 if(buf[i] != '-')
665 return -1;
666 i++;
667 j = parseInt(buf, i, &to);
668 if(j < 0)
669 to = -1;
670 else {
671 to = to + 1;
672 i = j;
674 j = skipEol(buf, i);
675 if(j < 0) return -1;
676 *from_return = from;
677 *to_return = to;
678 return i;
681 static int
682 urlSameHost(const char *url1, int len1, const char *url2, int len2)
684 int i;
685 if(len1 < 7 || len2 < 7)
686 return 0;
687 if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0)
688 return 0;
690 i = 7;
691 while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') {
692 if((url1[i] | 0x20) != (url2[i] | 0x20))
693 break;
694 i++;
697 if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/')))
698 return 1;
699 return 0;
702 static char *
703 resize_hbuf(char *hbuf, int *size, char *hbuf_small)
705 int new_size = 2 * *size;
706 char *new_hbuf;
708 if(new_size <= *size)
709 goto fail;
711 if(hbuf == hbuf_small) {
712 new_hbuf = malloc(new_size);
713 if(new_hbuf == NULL) goto fail;
714 memcpy(new_hbuf, hbuf, *size);
715 } else {
716 new_hbuf = realloc(hbuf, new_size);
717 if(new_hbuf == NULL) goto fail;
719 *size = new_size;
720 return new_hbuf;
722 fail:
723 if(hbuf != hbuf_small)
724 free(hbuf);
725 *size = 0;
726 return NULL;
730 httpParseHeaders(int client, AtomPtr url,
731 const char *buf, int start, HTTPRequestPtr request,
732 AtomPtr *headers_return,
733 int *len_return, CacheControlPtr cache_control_return,
734 HTTPConditionPtr *condition_return, int *te_return,
735 time_t *date_return, time_t *last_modified_return,
736 time_t *expires_return, time_t *polipo_age_return,
737 time_t *polipo_access_return, int *polipo_body_offset_return,
738 int *age_return, char **etag_return, AtomPtr *expect_return,
739 HTTPRangePtr range_return, HTTPRangePtr content_range_return,
740 char **location_return, AtomPtr *via_return,
741 AtomPtr *auth_return)
743 int local = url ? urlIsLocal(url->string, url->length) : 0;
744 char hbuf_small[512];
745 char *hbuf = hbuf_small;
746 int hbuf_size = 512, hbuf_length = 0;
747 int i, j,
748 name_start, name_end, value_start, value_end,
749 token_start, token_end, end;
750 AtomPtr name = NULL;
751 time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1,
752 polipo_access = -1, polipo_body_offset = -1;
753 int len = -1;
754 CacheControlRec cache_control;
755 char *endptr;
756 int te = TE_IDENTITY;
757 int age = -1;
758 char *etag = NULL, *ifrange = NULL;
759 int persistent = (!request || (request->connection->version != HTTP_10));
760 char *location = NULL;
761 AtomPtr via = NULL;
762 AtomPtr auth = NULL;
763 AtomPtr expect = NULL;
764 HTTPConditionPtr condition;
765 time_t ims = -1, inms = -1;
766 char *im = NULL, *inm = NULL;
767 AtomListPtr hopToHop = NULL;
768 HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1};
769 int haveCacheControl = 0;
771 #define RESIZE_HBUF() \
772 do { \
773 hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \
774 if(hbuf == NULL) \
775 goto fail; \
776 } while(0)
778 cache_control.flags = 0;
779 cache_control.max_age = -1;
780 cache_control.s_maxage = -1;
781 cache_control.min_fresh = -1;
782 cache_control.max_stale = -1;
784 i = start;
786 while(1) {
787 i = parseHeaderLine(buf, i,
788 &name_start, &name_end, &value_start, &value_end);
789 if(i < 0) {
790 do_log(L_ERROR, "Couldn't find end of header line.\n");
791 goto fail;
794 if(name_start == -1)
795 break;
797 if(name_start < 0)
798 continue;
800 name = internAtomLowerN(buf + name_start, name_end - name_start);
802 if(name == atomConnection) {
803 j = getNextTokenInList(buf, value_start,
804 &token_start, &token_end, NULL, NULL,
805 &end);
806 while(1) {
807 if(j < 0) {
808 do_log(L_ERROR, "Couldn't parse Connection: ");
809 do_log_n(L_ERROR, buf + value_start,
810 value_end - value_start);
811 do_log(L_ERROR, ".\n");
812 goto fail;
814 if(token_compare(buf, token_start, token_end, "close")) {
815 persistent = 0;
816 } else if(token_compare(buf, token_start, token_end,
817 "keep-alive")) {
818 persistent = 1;
819 } else {
820 if(hopToHop == NULL)
821 hopToHop = makeAtomList(NULL, 0);
822 if(hopToHop == NULL) {
823 do_log(L_ERROR, "Couldn't allocate atom list.\n");
824 goto fail;
826 atomListCons(internAtomLowerN(buf + token_start,
827 token_end - token_start),
828 hopToHop);
830 if(end)
831 break;
832 j = getNextTokenInList(buf, j,
833 &token_start, &token_end, NULL, NULL,
834 &end);
836 } else if(name == atomCacheControl)
837 haveCacheControl = 1;
839 releaseAtom(name);
840 name = NULL;
843 i = start;
845 while(1) {
846 i = parseHeaderLine(buf, i,
847 &name_start, &name_end, &value_start, &value_end);
848 if(i < 0) {
849 do_log(L_ERROR, "Couldn't find end of header line.\n");
850 goto fail;
853 if(name_start == -1)
854 break;
856 if(name_start < 0) {
857 do_log(L_WARN, "Couldn't parse header line.\n");
858 if(laxHttpParser)
859 continue;
860 else
861 goto fail;
864 name = internAtomLowerN(buf + name_start, name_end - name_start);
866 if(name == atomProxyConnection) {
867 j = getNextTokenInList(buf, value_start,
868 &token_start, &token_end, NULL, NULL,
869 &end);
870 while(1) {
871 if(j < 0) {
872 do_log(L_WARN, "Couldn't parse Proxy-Connection:");
873 do_log_n(L_WARN, buf + value_start,
874 value_end - value_start);
875 do_log(L_WARN, ".\n");
876 persistent = 0;
877 break;
879 if(token_compare(buf, token_start, token_end, "close")) {
880 persistent = 0;
881 } else if(token_compare(buf, token_start, token_end,
882 "keep-alive")) {
883 persistent = 1;
885 if(end)
886 break;
887 j = getNextTokenInList(buf, j,
888 &token_start, &token_end, NULL, NULL,
889 &end);
891 } else if(name == atomContentLength) {
892 j = skipWhitespace(buf, value_start);
893 if(j < 0) {
894 do_log(L_WARN, "Couldn't parse Content-Length: \n");
895 do_log_n(L_WARN, buf + value_start, value_end - value_start);
896 do_log(L_WARN, ".\n");
897 len = -1;
898 } else {
899 len = strtol(buf + value_start, &endptr, 10);
900 if(endptr <= buf + value_start) {
901 do_log(L_WARN, "Couldn't parse Content-Length: \n");
902 do_log_n(L_WARN, buf + value_start,
903 value_end - value_start);
904 do_log(L_WARN, ".\n");
905 len = -1;
908 } else if((!local && name == atomProxyAuthorization) ||
909 (local && name == atomAuthorization)) {
910 if(auth_return) {
911 auth = internAtomN(buf + value_start, value_end - value_start);
912 if(auth == NULL) {
913 do_log(L_ERROR, "Couldn't allocate authorization.\n");
914 goto fail;
917 } else if(name == atomReferer) {
918 int h;
919 if(censorReferer == 0 ||
920 (censorReferer == 1 && url != NULL &&
921 urlSameHost(url->string, url->length,
922 buf + value_start, value_end - value_start))) {
923 while(hbuf_length > hbuf_size - 2)
924 RESIZE_HBUF();
925 hbuf[hbuf_length++] = '\r';
926 hbuf[hbuf_length++] = '\n';
927 do {
928 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
929 buf + name_start, value_end - name_start);
930 if(h < 0) RESIZE_HBUF();
931 } while(h < 0);
932 hbuf_length = h;
934 } else if(name == atomTrailers || name == atomUpgrade) {
935 do_log(L_ERROR, "Trailers or upgrade present.\n");
936 goto fail;
937 } else if(name == atomDate || name == atomExpires ||
938 name == atomIfModifiedSince ||
939 name == atomIfUnmodifiedSince ||
940 name == atomLastModified ||
941 name == atomXPolipoDate || name == atomXPolipoAccess) {
942 time_t t;
943 j = parse_time(buf, value_start, value_end, &t);
944 if(j < 0) {
945 if(name != atomExpires) {
946 do_log(L_WARN, "Couldn't parse %s: ", name->string);
947 do_log_n(L_WARN, buf + value_start,
948 value_end - value_start);
949 do_log(L_WARN, "\n");
951 t = -1;
953 if(name == atomDate) {
954 if(t >= 0)
955 date = t;
956 } else if(name == atomExpires) {
957 if(t >= 0)
958 expires = t;
959 else
960 expires = 0;
961 } else if(name == atomLastModified)
962 last_modified = t;
963 else if(name == atomIfModifiedSince)
964 ims = t;
965 else if(name == atomIfUnmodifiedSince)
966 inms = t;
967 else if(name == atomXPolipoDate)
968 polipo_age = t;
969 else if(name == atomXPolipoAccess)
970 polipo_access = t;
971 } else if(name == atomAge) {
972 j = skipWhitespace(buf, value_start);
973 if(j < 0) {
974 age = -1;
975 } else {
976 age = strtol(buf + value_start, &endptr, 10);
977 if(endptr <= buf + value_start)
978 age = -1;
980 if(age < 0) {
981 do_log(L_WARN, "Couldn't parse age: \n");
982 do_log_n(L_WARN, buf + value_start, value_end - value_start);
983 do_log(L_WARN, " -- ignored.\n");
985 } else if(name == atomXPolipoBodyOffset) {
986 j = skipWhitespace(buf, value_start);
987 if(j < 0) {
988 do_log(L_ERROR, "Couldn't parse body offset.\n");
989 goto fail;
990 } else {
991 polipo_body_offset = strtol(buf + value_start, &endptr, 10);
992 if(endptr <= buf + value_start) {
993 do_log(L_ERROR, "Couldn't parse body offset.\n");
994 goto fail;
997 } else if(name == atomTransferEncoding) {
998 if(token_compare(buf, value_start, value_end, "identity"))
999 te = TE_IDENTITY;
1000 else if(token_compare(buf, value_start, value_end, "chunked"))
1001 te = TE_CHUNKED;
1002 else
1003 te = TE_UNKNOWN;
1004 } else if(name == atomETag ||
1005 name == atomIfNoneMatch || name == atomIfMatch ||
1006 name == atomIfRange) {
1007 int x, y;
1008 int weak;
1009 char *e;
1010 j = getNextETag(buf, value_start, &x, &y, &weak);
1011 if(j < 0) {
1012 if(buf[value_start] != '\r' && buf[value_start] != '\n')
1013 do_log(L_ERROR, "Couldn't parse ETag.\n");
1014 } else if(weak) {
1015 do_log(L_WARN, "Server returned weak ETag -- ignored.\n");
1016 } else {
1017 e = strdup_n(buf + x, y - x);
1018 if(e == NULL) goto fail;
1019 if(name == atomETag) {
1020 if(!etag)
1021 etag = e;
1022 else
1023 free(e);
1024 } else if(name == atomIfNoneMatch) {
1025 if(!inm)
1026 inm = e;
1027 else
1028 free(e);
1029 } else if(name == atomIfMatch) {
1030 if(!im)
1031 im = e;
1032 else
1033 free(e);
1034 } else if(name == atomIfRange) {
1035 if(!ifrange)
1036 ifrange = e;
1037 else
1038 free(e);
1039 } else {
1040 abort();
1043 } else if(name == atomCacheControl) {
1044 int v_start, v_end;
1045 j = getNextTokenInList(buf, value_start,
1046 &token_start, &token_end,
1047 &v_start, &v_end,
1048 &end);
1049 while(1) {
1050 if(j < 0) {
1051 do_log(L_WARN, "Couldn't parse Cache-Control.\n");
1052 cache_control.flags |= CACHE_NO;
1053 break;
1055 if(token_compare(buf, token_start, token_end, "no-cache")) {
1056 cache_control.flags |= CACHE_NO;
1057 } else if(token_compare(buf, token_start, token_end,
1058 "public")) {
1059 cache_control.flags |= CACHE_PUBLIC;
1060 } else if(token_compare(buf, token_start, token_end,
1061 "private")) {
1062 cache_control.flags |= CACHE_PRIVATE;
1063 } else if(token_compare(buf, token_start, token_end,
1064 "no-store")) {
1065 cache_control.flags |= CACHE_NO_STORE;
1066 } else if(token_compare(buf, token_start, token_end,
1067 "no-transform")) {
1068 cache_control.flags |= CACHE_NO_TRANSFORM;
1069 } else if(token_compare(buf, token_start, token_end,
1070 "must-revalidate") ||
1071 token_compare(buf, token_start, token_end,
1072 "must-validate")) { /* losers */
1073 cache_control.flags |= CACHE_MUST_REVALIDATE;
1074 } else if(token_compare(buf, token_start, token_end,
1075 "proxy-revalidate")) {
1076 cache_control.flags |= CACHE_PROXY_REVALIDATE;
1077 } else if(token_compare(buf, token_start, token_end,
1078 "only-if-cached")) {
1079 cache_control.flags |= CACHE_ONLY_IF_CACHED;
1080 } else if(token_compare(buf, token_start, token_end,
1081 "max-age") ||
1082 token_compare(buf, token_start, token_end,
1083 "maxage")) { /* losers */
1084 int a;
1085 if(v_start <= 0 || !digit(buf[v_start])) {
1086 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1087 do_log_n(L_WARN, buf + token_start,
1088 (v_end >= 0 ? v_end : token_end) -
1089 token_start);
1090 do_log(L_WARN, "\n");
1092 a = atoi(buf + v_start);
1093 cache_control.max_age = a;
1094 } else if(token_compare(buf, token_start, token_end,
1095 "s-maxage")) {
1096 int a;
1097 if(v_start <= 0 || !digit(buf[v_start])) {
1098 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1099 do_log_n(L_WARN, buf + token_start,
1100 (v_end >= 0 ? v_end : token_end) -
1101 token_start);
1102 do_log(L_WARN, "\n");
1104 a = atoi(buf + v_start);
1105 cache_control.max_age = a;
1106 } else if(token_compare(buf, token_start, token_end,
1107 "min-fresh")) {
1108 int a;
1109 if(v_start <= 0 || !digit(buf[v_start])) {
1110 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1111 do_log_n(L_WARN, buf + token_start,
1112 (v_end >= 0 ? v_end : token_end) -
1113 token_start);
1114 do_log(L_WARN, "\n");
1116 a = atoi(buf + v_start);
1117 cache_control.max_age = a;
1118 } else if(token_compare(buf, token_start, token_end,
1119 "max-stale")) {
1120 int a;
1121 if(v_start <= 0 || !digit(buf[v_start])) {
1122 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1123 do_log_n(L_WARN, buf + token_start,
1124 (v_end >= 0 ? v_end : token_end) -
1125 token_start);
1126 do_log(L_WARN, "\n");
1128 a = atoi(buf + v_start);
1129 cache_control.max_stale = a;
1130 } else {
1131 do_log(L_WARN, "Unsupported Cache-Control directive ");
1132 do_log_n(L_WARN, buf + token_start,
1133 (v_end >= 0 ? v_end : token_end) - token_start);
1134 do_log(L_WARN, " -- ignored.\n");
1136 if(end)
1137 break;
1138 j = getNextTokenInList(buf, j,
1139 &token_start, &token_end,
1140 &v_start, &v_end,
1141 &end);
1143 } else if(name == atomContentRange) {
1144 if(!client) {
1145 j = parseContentRange(buf, value_start,
1146 &content_range.from, &content_range.to,
1147 &content_range.full_length);
1148 if(j < 0) {
1149 do_log(L_ERROR, "Couldn't parse Content-Range: ");
1150 do_log_n(L_ERROR, buf + value_start,
1151 value_end - value_start);
1152 do_log(L_ERROR, "\n");
1153 goto fail;
1155 } else {
1156 do_log(L_ERROR, "Content-Range from client.\n");
1157 goto fail;
1159 } else if(name == atomRange) {
1160 if(client) {
1161 j = parseRange(buf, value_start, &range.from, &range.to);
1162 if(j < 0) {
1163 do_log(L_WARN, "Couldn't parse Range -- ignored.\n");
1164 range.from = -1;
1165 range.to = -1;
1167 } else {
1168 do_log(L_WARN, "Range from server -- ignored\n");
1170 } else if(name == atomXPolipoLocation) {
1171 if(location_return) {
1172 location =
1173 strdup_n(buf + value_start, value_end - value_start);
1174 if(location == NULL) {
1175 do_log(L_ERROR, "Couldn't allocate location.\n");
1176 goto fail;
1179 } else if(name == atomVia) {
1180 if(via_return) {
1181 via = internAtomN(buf + value_start, value_end - value_start);
1182 if(via == NULL) {
1183 do_log(L_ERROR, "Couldn't allocate via.\n");
1184 goto fail;
1187 } else if(name == atomExpect) {
1188 if(expect_return) {
1189 expect = internAtomLowerN(buf + value_start,
1190 value_end - value_start);
1191 if(expect == NULL) {
1192 do_log(L_ERROR, "Couldn't allocate expect.\n");
1193 goto fail;
1196 } else {
1197 if(!client && name == atomContentType) {
1198 if(token_compare(buf, value_start, value_end,
1199 "multipart/byteranges")) {
1200 do_log(L_ERROR,
1201 "Server returned multipart/byteranges -- yuck!\n");
1202 goto fail;
1205 if(name == atomVary) {
1206 if(!token_compare(buf, value_start, value_end, "host") &&
1207 !token_compare(buf, value_start, value_end, "*")) {
1208 /* What other vary headers should be ignored? */
1209 do_log(L_VARY, "Vary header present (");
1210 do_log_n(L_VARY,
1211 buf + value_start, value_end - value_start);
1212 do_log(L_VARY, ").\n");
1213 cache_control.flags |= CACHE_VARY;
1215 } else if(name == atomAuthorization) {
1216 cache_control.flags |= CACHE_AUTHORIZATION;
1219 if(name == atomPragma) {
1220 /* Pragma is only defined for the client, and the only
1221 standard value is no-cache (RFC 1945, 10.12).
1222 However, we honour a Pragma: no-cache for both the client
1223 and the server when there's no Cache-Control header. In
1224 all cases, we pass the Pragma header to the next hop. */
1225 if(!haveCacheControl) {
1226 j = getNextTokenInList(buf, value_start,
1227 &token_start, &token_end, NULL, NULL,
1228 &end);
1229 while(1) {
1230 if(j < 0) {
1231 do_log(L_WARN, "Couldn't parse Pragma.\n");
1232 cache_control.flags |= CACHE_NO;
1233 break;
1235 if(token_compare(buf, token_start, token_end,
1236 "no-cache"))
1237 cache_control.flags = CACHE_NO;
1238 if(end)
1239 break;
1240 j = getNextTokenInList(buf, j, &token_start, &token_end,
1241 NULL, NULL, &end);
1245 if(!client &&
1246 (name == atomSetCookie ||
1247 name == atomCookie || name == atomCookie2))
1248 cache_control.flags |= CACHE_COOKIE;
1250 if(hbuf) {
1251 if(name != atomConnection && name != atomHost &&
1252 name != atomAcceptRange && name != atomTE &&
1253 name != atomProxyAuthenticate &&
1254 name != atomKeepAlive &&
1255 (!hopToHop || !atomListMember(name, hopToHop)) &&
1256 !atomListMember(name, censoredHeaders)) {
1257 int h;
1258 while(hbuf_length > hbuf_size - 2)
1259 RESIZE_HBUF();
1260 hbuf[hbuf_length++] = '\r';
1261 hbuf[hbuf_length++] = '\n';
1262 do {
1263 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
1264 buf + name_start,
1265 value_end - name_start);
1266 if(h < 0) RESIZE_HBUF();
1267 } while(h < 0);
1268 hbuf_length = h;
1272 releaseAtom(name);
1273 name = NULL;
1276 if(headers_return) {
1277 AtomPtr pheaders = NULL;
1278 pheaders = internAtomN(hbuf, hbuf_length);
1279 if(!pheaders)
1280 goto fail;
1281 *headers_return = pheaders;
1283 if(hbuf != hbuf_small)
1284 free(hbuf);
1285 hbuf = NULL;
1286 hbuf_size = 0;
1288 if(request)
1289 if(!persistent)
1290 request->flags &= ~REQUEST_PERSISTENT;
1292 if(te != TE_IDENTITY) len = -1;
1293 if(len_return) *len_return = len;
1294 if(cache_control_return) *cache_control_return = cache_control;
1295 if(condition_return) {
1296 if(ims >= 0 || inms >= 0 || im || inm || ifrange) {
1297 condition = httpMakeCondition();
1298 if(condition) {
1299 condition->ims = ims;
1300 condition->inms = inms;
1301 condition->im = im;
1302 condition->inm = inm;
1303 condition->ifrange = ifrange;
1304 } else {
1305 do_log(L_ERROR, "Couldn't allocate condition.\n");
1306 if(im) free(im);
1307 if(inm) free(inm);
1309 } else {
1310 condition = NULL;
1312 *condition_return = condition;
1313 } else {
1314 assert(!im && !inm);
1317 if(te_return) *te_return = te;
1318 if(date_return) *date_return = date;
1319 if(last_modified_return) *last_modified_return = last_modified;
1320 if(expires_return) *expires_return = expires;
1321 if(polipo_age_return) *polipo_age_return = polipo_age;
1322 if(polipo_access_return) *polipo_access_return = polipo_access;
1323 if(polipo_body_offset_return)
1324 *polipo_body_offset_return = polipo_body_offset;
1325 if(age_return) *age_return = age;
1326 if(etag_return)
1327 *etag_return = etag;
1328 else {
1329 if(etag) free(etag);
1331 if(range_return) *range_return = range;
1332 if(content_range_return) *content_range_return = content_range;
1333 if(location_return) {
1334 *location_return = location;
1335 } else {
1336 if(location)
1337 free(location);
1339 if(via_return)
1340 *via_return = via;
1341 else {
1342 if(via)
1343 releaseAtom(via);
1345 if(expect_return)
1346 *expect_return = expect;
1347 else {
1348 if(expect)
1349 releaseAtom(expect);
1351 if(auth_return)
1352 *auth_return = auth;
1353 else {
1354 if(auth)
1355 releaseAtom(auth);
1357 if(hopToHop) destroyAtomList(hopToHop);
1358 return i;
1360 fail:
1361 if(hbuf && hbuf != hbuf_small) free(hbuf);
1362 if(name) releaseAtom(name);
1363 if(etag) free(etag);
1364 if(location) free(location);
1365 if(via) releaseAtom(via);
1366 if(expect) releaseAtom(expect);
1367 if(auth) releaseAtom(auth);
1368 if(hopToHop) destroyAtomList(hopToHop);
1370 return -1;
1371 #undef RESIZE_HBUF
1375 httpFindHeader(AtomPtr header, const char *headers, int hlen,
1376 int *value_begin_return, int *value_end_return)
1378 int len = header->length;
1379 int i = 0;
1381 while(i + len + 1 < hlen) {
1382 if(headers[i + len] == ':' &&
1383 lwrcmp(headers + i, header->string, len) == 0) {
1384 int j = i + len + 1, k;
1385 while(j < hlen && headers[j] == ' ')
1386 j++;
1387 k = j;
1388 while(k < hlen && headers[k] != '\n' && headers[k] != '\r')
1389 k++;
1390 *value_begin_return = j;
1391 *value_end_return = k;
1392 return 1;
1393 } else {
1394 while(i < hlen && headers[i] != '\n' && headers[i] != '\r')
1395 i++;
1396 i++;
1397 if(i < hlen && headers[i] == '\n')
1398 i++;
1401 return 0;
1405 parseUrl(const char *url, int len,
1406 int *x_return, int *y_return, int *port_return, int *z_return)
1408 int x, y, z, port = -1, i = 0;
1410 if(len >= 7 && lwrcmp(url, "http://", 7) == 0) {
1411 x = 7;
1412 if(x < len && url[x] == '[') {
1413 /* RFC 2732 */
1414 for(i = x + 1; i < len; i++) {
1415 if(url[i] == ']') {
1416 i++;
1417 break;
1419 if((url[i] != ':') && !letter(url[i]) && !digit(url[i]))
1420 break;
1422 } else {
1423 for(i = x; i < len; i++)
1424 if(url[i] == ':' || url[i] == '/')
1425 break;
1427 y = i;
1429 if(i < len && url[i] == ':') {
1430 int j;
1431 j = atoi_n(url, i + 1, len, &port);
1432 if(j < 0) {
1433 port = 80;
1434 } else {
1435 i = j;
1437 } else {
1438 port = 80;
1440 } else {
1441 x = -1;
1442 y = -1;
1445 z = i;
1447 *x_return = x;
1448 *y_return = y;
1449 *port_return = port;
1450 *z_return = z;
1451 return 0;
1455 urlIsLocal(const char *url, int len)
1457 return (len > 0 && url[0] == '/');
1461 urlIsSpecial(const char *url, int len)
1463 return (len >= 8 && memcmp(url, "/polipo/", 8) == 0);
1467 parseChunkSize(const char *restrict buf, int i, int end,
1468 int *chunk_size_return)
1470 int v, d;
1471 v = h2i(buf[i]);
1472 if(v < 0)
1473 return -1;
1475 i++;
1477 while(i < end) {
1478 d = h2i(buf[i]);
1479 if(d < 0)
1480 break;
1481 v = v * 16 + d;
1482 i++;
1485 while(i < end) {
1486 if(buf[i] == ' ' || buf[i] == '\t')
1487 i++;
1488 else
1489 break;
1492 if(i >= end - 1)
1493 return 0;
1495 if(buf[i] != '\r' || buf[i + 1] != '\n')
1496 return -1;
1498 i += 2;
1500 if(v == 0) {
1501 if(i >= end - 1)
1502 return 0;
1503 if(buf[i] != '\r') {
1504 do_log(L_ERROR, "Trailers present!\n");
1505 return -1;
1507 i++;
1508 if(buf[i] != '\n')
1509 return -1;
1510 i++;
1513 *chunk_size_return = v;
1514 return i;
1519 checkVia(AtomPtr name, AtomPtr via)
1521 int i;
1522 char *v;
1523 if(via == NULL || via->length == 0)
1524 return 1;
1526 v = via->string;
1528 i = 0;
1529 while(i < via->length) {
1530 while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' ||
1531 v[i] == '\r' || v[i] == '\n' ||
1532 digit(v[i]) || v[i] == '.')
1533 i++;
1534 if(i + name->length > via->length)
1535 break;
1536 if(memcmp(v + i, name->string, name->length) == 0) {
1537 char c = v[i + name->length];
1538 if(c == '\0' || c == ' ' || c == '\t' || c == ',' ||
1539 c == '\r' || c == '\n')
1540 return 0;
1542 i++;
1543 while(letter(v[i]) || digit(v[i]) || v[i] == '.')
1544 i++;
1546 return 1;