Define UNALIGNED_ACCESS on AMD64.
[polipo.git] / http_parse.c
blob555efc46de7a64a1c6aa668f91bbd8bb10046150
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 static int getNextWord(const char *buf, int i, int *x_return, int *y_return);
26 static int getNextToken(const char *buf, int i, int *x_return, int *y_return);
27 static int getNextTokenInList(const char *buf, int i,
28 int *x_return, int *y_return,
29 int *z_return, int *t_return,
30 int *end_return);
32 static AtomPtr atomConnection, atomProxyConnection, atomContentLength,
33 atomHost, atomAcceptRange, atomTE,
34 atomReferer, atomProxyAuthenticate, atomProxyAuthorization,
35 atomKeepAlive, atomTrailer, atomUpgrade, atomDate, atomExpires,
36 atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified,
37 atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding,
38 atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange,
39 atomVia, atomVary, atomExpect, atomAuthorization,
40 atomSetCookie, atomCookie, atomCookie2,
41 atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation,
42 atomXPolipoBodyOffset;
44 AtomPtr atomContentType, atomContentEncoding;
46 int censorReferer = 0;
47 int laxHttpParser = 1;
49 static AtomListPtr censoredHeaders;
51 void
52 preinitHttpParser()
54 CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter,
55 "Censor referer headers.");
56 censoredHeaders = makeAtomList(NULL, 0);
57 if(censoredHeaders == NULL) {
58 do_log(L_ERROR, "Couldn't allocate censored atoms.\n");
59 exit(1);
61 CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER,
62 "Headers to censor.");
63 CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter,
64 "Ignore unknown HTTP headers.");
67 void
68 initHttpParser()
70 #define A(name, value) name = internAtom(value); if(!name) goto fail;
71 /* These must be in lower-case */
72 A(atomConnection, "connection");
73 A(atomProxyConnection, "proxy-connection");
74 A(atomContentLength, "content-length");
75 A(atomHost, "host");
76 A(atomAcceptRange, "accept-range");
77 A(atomTE, "te");
78 A(atomReferer, "referer");
79 A(atomProxyAuthenticate, "proxy-authenticate");
80 A(atomProxyAuthorization, "proxy-authorization");
81 A(atomKeepAlive, "keep-alive");
82 A(atomTrailer, "trailer");
83 A(atomUpgrade, "upgrade");
84 A(atomDate, "date");
85 A(atomExpires, "expires");
86 A(atomIfModifiedSince, "if-modified-since");
87 A(atomIfUnmodifiedSince, "if-unmodified-since");
88 A(atomIfRange, "if-range");
89 A(atomLastModified, "last-modified");
90 A(atomIfMatch, "if-match");
91 A(atomIfNoneMatch, "if-none-match");
92 A(atomAge, "age");
93 A(atomTransferEncoding, "transfer-encoding");
94 A(atomETag, "etag");
95 A(atomCacheControl, "cache-control");
96 A(atomPragma, "pragma");
97 A(atomContentRange, "content-range");
98 A(atomRange, "range");
99 A(atomVia, "via");
100 A(atomContentType, "content-type");
101 A(atomContentEncoding, "content-encoding");
102 A(atomVary, "vary");
103 A(atomExpect, "expect");
104 A(atomAuthorization, "authorization");
105 A(atomSetCookie, "set-cookie");
106 A(atomCookie, "cookie");
107 A(atomCookie2, "cookie2");
108 A(atomXPolipoDate, "x-polipo-date");
109 A(atomXPolipoAccess, "x-polipo-access");
110 A(atomXPolipoLocation, "x-polipo-location");
111 A(atomXPolipoBodyOffset, "x-polipo-body-offset");
112 #undef A
113 return;
115 fail:
116 do_log(L_ERROR, "Couldn't allocate atom.\n");
117 exit(1);
120 static int
121 getNextWord(const char *restrict buf, int i, int *x_return, int *y_return)
123 int x, y;
124 while(buf[i] == ' ') i++;
125 if(buf[i] == '\n' || buf[i] == '\r') return -1;
126 x = i;
127 while(buf[i] > 32 && buf[i] < 127) i++;
128 y = i;
130 *x_return = x;
131 *y_return = y;
133 return 0;
136 static int
137 skipComment(const char *restrict buf, int i)
139 assert(buf[i] == '(');
141 i++;
142 while(1) {
143 if(buf[i] == '\\' && buf[i + 1] == ')') i+=2;
144 else if(buf[i] == ')') return i + 1;
145 else if(buf[i] == '\n') {
146 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
147 i += 2;
148 else
149 return -1;
150 } else if(buf[i] == '\r') {
151 if(buf[i + 1] != '\n') return -1;
152 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
153 i += 3;
154 else
155 return -1;
156 } else {
157 i++;
160 return i;
164 static int
165 skipWhitespace(const char *restrict buf, int i)
167 while(1) {
168 if(buf[i] == ' ' || buf[i] == '\t')
169 i++;
170 else if(buf[i] == '(') {
171 i = skipComment(buf, i);
172 if(i < 0) return -1;
173 } else if(buf[i] == '\n') {
174 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
175 i += 2;
176 else
177 return i;
178 } else if(buf[i] == '\r' && buf[i + 1] == '\n') {
179 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
180 i += 3;
181 else
182 return i;
183 } else
184 return i;
188 static int
189 getNextToken(const char *restrict buf, int i, int *x_return, int *y_return)
191 int x, y;
192 again:
193 while(buf[i] == ' ' || buf[i] == '\t')
194 i++;
195 if(buf[i] == '(') {
196 i++;
197 while(buf[i] != ')') {
198 if(buf[i] == '\n' || buf[i] == '\r')
199 return -1;
200 if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r')
201 buf += 2;
202 else
203 buf++;
205 goto again;
207 if(buf[i] == '\n') {
208 if(buf[i + 1] == ' ' || buf[i + 1] == '\t') {
209 i += 2;
210 goto again;
211 } else {
212 return -1;
215 if(buf[i] == '\r') {
216 if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) {
217 i += 3;
218 goto again;
219 } else {
220 return -1;
223 x = i;
224 while(buf[i] > 32 && buf[i] < 127) {
225 switch(buf[i]) {
226 case '(': case ')': case '<': case '>': case '@':
227 case ',': case ';': case ':': case '\\': case '/':
228 case '[': case ']': case '?': case '=':
229 case '{': case '}': case ' ': case '\t':
230 goto out;
231 default:
232 i++;
235 out:
236 y = i;
238 *x_return = x;
239 *y_return = y;
241 return y;
244 static int
245 getNextETag(const char * restrict buf, int i,
246 int *x_return, int *y_return, int *weak_return)
248 int weak = 0;
249 int x, y;
250 while(buf[i] == ' ' || buf[i] == '\t')
251 i++;
252 if(buf[i] == 'W' && buf[i + 1] == '/') {
253 weak = 1;
254 i += 2;
256 if(buf[i] == '"')
257 i++;
258 else
259 return -1;
261 x = i;
262 while(buf[i] != '"') {
263 if(buf[i] == '\r' && buf[i] == '\n')
264 return -1;
265 i++;
267 y = i;
268 i++;
270 *x_return = x;
271 *y_return = y;
272 *weak_return = weak;
273 return i;
276 static int
277 getNextTokenInList(const char *restrict buf, int i,
278 int *x_return, int *y_return,
279 int *z_return, int *t_return,
280 int *end_return)
282 int j, x, y, z = -1, t = -1, end;
283 j = getNextToken(buf, i, &x, &y);
284 if(j < 0)
285 return -1;
286 while(buf[j] == ' ' || buf[j] == '\t')
287 j++;
289 if(buf[j] == '=') {
290 j++;
291 while(buf[j] == ' ' || buf[j] == '\t')
292 j++;
293 z = j;
294 while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r')
295 j++;
298 if(buf[j] == '\n' || buf[j] == '\r') {
299 if(buf[j] == '\r') {
300 if(buf[j + 1] != '\n')
301 return -1;
302 j += 2;
303 } else
304 j++;
305 end = 1;
306 if(buf[j] == ' ' || buf[j] == '\t') {
307 while(buf[j] == ' ' || buf[j] == '\t')
308 j++;
309 end = 0;
311 } else if(buf[j] == ',') {
312 j++;
313 while(buf[j] == ' ' || buf[j] == '\t')
314 j++;
315 end = 0;
316 } else {
317 return -1;
320 *x_return = x;
321 *y_return = y;
322 if(z_return)
323 *z_return = z;
324 if(t_return)
325 *t_return = t;
326 *end_return = end;
327 return j;
330 static inline int
331 token_compare(const char *buf, int start, int end, const char *s)
333 return (strcasecmp_n(s, buf + start, end - start) == 0);
336 static int
337 skipEol(const char *restrict buf, int i)
339 while(buf[i] == ' ')
340 i++;
341 if(buf[i] == '\n')
342 return i + 1;
343 else if(buf[i] == '\r') {
344 if(buf[i + 1] == '\n')
345 return i + 2;
346 else
347 return -1;
348 } else {
349 return -1;
353 static int
354 skipToEol(const char *restrict buf, int i, int *start_return)
356 while(buf[i] != '\n' && buf[i] != '\r')
357 i++;
358 if(buf[i] == '\n') {
359 *start_return = i;
360 return i + 1;
361 } else if(buf[i] == '\r') {
362 if(buf[i + 1] == '\n') {
363 *start_return = i;
364 return i + 2;
365 } else {
366 return -1;
369 return -1;
372 static int
373 getHeaderValue(const char *restrict buf, int start,
374 int *value_start_return, int *value_end_return)
376 int i, j, k;
378 while(buf[start] == ' ' || buf[start] == '\t')
379 start++;
380 i = start;
381 again:
382 j = skipToEol(buf, i, &k);
383 if(j < 0)
384 return -1;
385 if(buf[j] == ' ' || buf[j] == '\t') {
386 i = j + 1;
387 goto again;
389 *value_start_return = start;
390 *value_end_return = k;
391 return j;
395 httpParseClientFirstLine(const char *restrict buf, int offset,
396 int *method_return,
397 AtomPtr *url_return,
398 int *version_return)
400 int i = 0;
401 int x, y;
402 int method;
403 AtomPtr url;
404 int version = HTTP_UNKNOWN;
405 int eol;
407 i = offset;
408 i = getNextWord(buf, i, &x, &y);
409 if(i < 0) return -1;
410 if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0)
411 method = METHOD_GET;
412 else if(y == x + 4 && memcmp(buf + x, "HEAD", 4) == 0)
413 method = METHOD_HEAD;
414 else if(y == x + 4 && memcmp(buf + x, "POST", 4) == 0)
415 method = METHOD_POST;
416 else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0)
417 method = METHOD_PUT;
418 else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0)
419 method = METHOD_CONNECT;
420 else
421 method = METHOD_UNKNOWN;
423 i = getNextWord(buf, y + 1, &x, &y);
424 if(i < 0) return -1;
426 url = internAtomN(buf + x, y - x);
428 i = getNextWord(buf, y + 1, &x, &y);
429 if(i < 0) {
430 releaseAtom(url);
431 return -1;
434 if(y == x + 8) {
435 if(memcmp(buf + x, "HTTP/1.", 7) != 0)
436 version = HTTP_UNKNOWN;
437 else if(buf[x + 7] == '0')
438 version = HTTP_10;
439 else if(buf[x + 7] >= '1' && buf[x + 7] <= '9')
440 version = HTTP_11;
441 else
442 version = HTTP_UNKNOWN;
445 eol = skipEol(buf, y);
446 if(eol < 0) return -1;
448 *method_return = method;
449 if(url_return)
450 *url_return = url;
451 else
452 releaseAtom(url);
453 *version_return = version;
454 return eol;
458 httpParseServerFirstLine(const char *restrict buf,
459 int *status_return,
460 int *version_return,
461 AtomPtr *message_return)
463 int i = 0;
464 int x, y, eol;
465 int status;
466 int version = HTTP_UNKNOWN;
468 i = getNextWord(buf, 0, &x, &y);
469 if(i < 0)
470 return -1;
471 if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0)
472 version = HTTP_10;
473 else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0)
474 version = HTTP_11;
475 else
476 version = HTTP_UNKNOWN;
478 i = getNextWord(buf, y + 1, &x, &y);
479 if(i < 0) return -1;
480 if(y == x + 3)
481 status = atol(buf + x);
482 else return -1;
484 i = skipToEol(buf, y, &eol);
485 if(i < 0) return -1;
487 *status_return = status;
488 *version_return = version;
489 if(message_return) {
490 /* Netscape enterprise bug */
491 if(eol > y)
492 *message_return = internAtomN(buf + y + 1, eol - y - 1);
493 else
494 *message_return = internAtom("No message");
496 return i;
499 static int
500 parseInt(const char *restrict buf, int start, int *val_return)
502 int i = start, val = 0;
503 if(!digit(buf[i]))
504 return -1;
505 while(digit(buf[i])) {
506 val = val * 10 + (buf[i] - '0');
507 i++;
509 *val_return = val;
510 return i;
513 /* Returned *name_start_return is -1 at end of headers, -2 if the line
514 couldn't be parsed. */
515 static int
516 parseHeaderLine(const char *restrict buf, int start,
517 int *name_start_return, int *name_end_return,
518 int *value_start_return, int *value_end_return)
520 int i;
521 int name_start, name_end, value_start, value_end;
523 if(buf[start] == '\n') {
524 *name_start_return = -1;
525 return start + 1;
527 if(buf[start] == '\r' && buf[start + 1] == '\n') {
528 *name_start_return = -1;
529 return start + 2;
532 i = getNextToken(buf, start, &name_start, &name_end);
533 if(i < 0 || buf[i] != ':')
534 goto syntax;
535 i++;
536 while(buf[i] == ' ' || buf[i] == '\t')
537 i++;
539 i = getHeaderValue(buf, i, &value_start, &value_end);
540 if(i < 0)
541 goto syntax;
543 *name_start_return = name_start;
544 *name_end_return = name_end;
545 *value_start_return = value_start;
546 *value_end_return = value_end;
547 return i;
549 syntax:
550 i = start;
551 while(1) {
552 if(buf[i] == '\n') {
553 i++;
554 break;
556 if(buf[i] == '\r' && buf[i + 1] == '\n') {
557 i += 2;
558 break;
560 i++;
562 *name_start_return = -2;
563 return i;
567 findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return)
569 int i = from;
570 int eol = 0;
571 while(i < to) {
572 if(buf[i] == '\n') {
573 if(eol) {
574 *body_return = i + 1;
575 return eol;
577 eol = i;
578 i++;
579 } else if(buf[i] == '\r') {
580 if(i < to - 1 && buf[i + 1] == '\n') {
581 if(eol) {
582 *body_return = eol;
583 return i + 2;
585 eol = i;
586 i += 2;
587 } else {
588 eol = 0;
589 i++;
591 } else {
592 eol = 0;
593 i++;
596 return -1;
599 static int
600 parseContentRange(const char *restrict buf, int i,
601 int *from_return, int *to_return, int *full_len_return)
603 int j;
604 int from, to, full_len;
606 i = skipWhitespace(buf, i);
607 if(i < 0) return -1;
608 if(!token_compare(buf, i, i + 5, "bytes")) {
609 do_log(L_WARN, "Incorrect Content-Range header -- chugging along.\n");
610 } else {
611 i += 5;
613 i = skipWhitespace(buf, i);
614 if(buf[i] == '*') {
615 from = 0;
616 to = -1;
617 i++;
618 } else {
619 i = parseInt(buf, i, &from);
620 if(i < 0) return -1;
621 if(buf[i] != '-') return -1;
622 i++;
623 i = parseInt(buf, i, &to);
624 if(i < 0) return -1;
625 to = to + 1;
627 if(buf[i] != '/')
628 return -1;
629 i++;
630 if(buf[i] == '*')
631 full_len = -1;
632 else {
633 i = parseInt(buf, i, &full_len);
634 if(i < 0) return -1;
636 j = skipEol(buf, i);
637 if(j < 0)
638 return -1;
640 *from_return = from;
641 *to_return = to;
642 *full_len_return = full_len;
643 return i;
646 static int
647 parseRange(const char *restrict buf, int i,
648 int *from_return, int *to_return)
650 int j;
651 int from, to;
653 i = skipWhitespace(buf, i);
654 if(i < 0)
655 return -1;
656 if(!token_compare(buf, i, i + 6, "bytes="))
657 return -1;
658 i += 6;
659 i = skipWhitespace(buf, i);
660 if(buf[i] == '-') {
661 from = 0;
662 } else {
663 i = parseInt(buf, i, &from);
664 if(i < 0) return -1;
666 if(buf[i] != '-')
667 return -1;
668 i++;
669 j = parseInt(buf, i, &to);
670 if(j < 0)
671 to = -1;
672 else {
673 to = to + 1;
674 i = j;
676 j = skipEol(buf, i);
677 if(j < 0) return -1;
678 *from_return = from;
679 *to_return = to;
680 return i;
683 static int
684 urlSameHost(const char *url1, int len1, const char *url2, int len2)
686 int i;
687 if(len1 < 7 || len2 < 7)
688 return 0;
689 if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0)
690 return 0;
692 i = 7;
693 while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') {
694 if((url1[i] | 0x20) != (url2[i] | 0x20))
695 break;
696 i++;
699 if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/')))
700 return 1;
701 return 0;
704 static char *
705 resize_hbuf(char *hbuf, int *size, char *hbuf_small)
707 int new_size = 2 * *size;
708 char *new_hbuf;
710 if(new_size <= *size)
711 goto fail;
713 if(hbuf == hbuf_small) {
714 new_hbuf = malloc(new_size);
715 if(new_hbuf == NULL) goto fail;
716 memcpy(new_hbuf, hbuf, *size);
717 } else {
718 new_hbuf = realloc(hbuf, new_size);
719 if(new_hbuf == NULL) goto fail;
721 *size = new_size;
722 return new_hbuf;
724 fail:
725 if(hbuf != hbuf_small)
726 free(hbuf);
727 *size = 0;
728 return NULL;
732 httpParseHeaders(int client, AtomPtr url,
733 const char *buf, int start, HTTPRequestPtr request,
734 AtomPtr *headers_return,
735 int *len_return, CacheControlPtr cache_control_return,
736 HTTPConditionPtr *condition_return, int *te_return,
737 time_t *date_return, time_t *last_modified_return,
738 time_t *expires_return, time_t *polipo_age_return,
739 time_t *polipo_access_return, int *polipo_body_offset_return,
740 int *age_return, char **etag_return, AtomPtr *expect_return,
741 HTTPRangePtr range_return, HTTPRangePtr content_range_return,
742 char **location_return, AtomPtr *via_return,
743 AtomPtr *auth_return)
745 int local = url ? urlIsLocal(url->string, url->length) : 0;
746 char hbuf_small[512];
747 char *hbuf = hbuf_small;
748 int hbuf_size = 512, hbuf_length = 0;
749 int i, j,
750 name_start, name_end, value_start, value_end,
751 token_start, token_end, end;
752 AtomPtr name = NULL;
753 time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1,
754 polipo_access = -1, polipo_body_offset = -1;
755 int len = -1;
756 CacheControlRec cache_control;
757 char *endptr;
758 int te = TE_IDENTITY;
759 int age = -1;
760 char *etag = NULL, *ifrange = NULL;
761 int persistent = (!request || (request->connection->version != HTTP_10));
762 char *location = NULL;
763 AtomPtr via = NULL;
764 AtomPtr auth = NULL;
765 AtomPtr expect = NULL;
766 HTTPConditionPtr condition;
767 time_t ims = -1, inms = -1;
768 char *im = NULL, *inm = NULL;
769 AtomListPtr hopToHop = NULL;
770 HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1};
771 int haveCacheControl = 0;
773 #define RESIZE_HBUF() \
774 do { \
775 hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \
776 if(hbuf == NULL) \
777 goto fail; \
778 } while(0)
780 cache_control.flags = 0;
781 cache_control.max_age = -1;
782 cache_control.s_maxage = -1;
783 cache_control.min_fresh = -1;
784 cache_control.max_stale = -1;
786 i = start;
788 while(1) {
789 i = parseHeaderLine(buf, i,
790 &name_start, &name_end, &value_start, &value_end);
791 if(i < 0) {
792 do_log(L_ERROR, "Couldn't find end of header line.\n");
793 goto fail;
796 if(name_start == -1)
797 break;
799 if(name_start < 0)
800 continue;
802 name = internAtomLowerN(buf + name_start, name_end - name_start);
804 if(name == atomConnection) {
805 j = getNextTokenInList(buf, value_start,
806 &token_start, &token_end, NULL, NULL,
807 &end);
808 while(1) {
809 if(j < 0) {
810 do_log(L_ERROR, "Couldn't parse Connection: ");
811 do_log_n(L_ERROR, buf + value_start,
812 value_end - value_start);
813 do_log(L_ERROR, ".\n");
814 goto fail;
816 if(token_compare(buf, token_start, token_end, "close")) {
817 persistent = 0;
818 } else if(token_compare(buf, token_start, token_end,
819 "keep-alive")) {
820 persistent = 1;
821 } else {
822 if(hopToHop == NULL)
823 hopToHop = makeAtomList(NULL, 0);
824 if(hopToHop == NULL) {
825 do_log(L_ERROR, "Couldn't allocate atom list.\n");
826 goto fail;
828 atomListCons(internAtomLowerN(buf + token_start,
829 token_end - token_start),
830 hopToHop);
832 if(end)
833 break;
834 j = getNextTokenInList(buf, j,
835 &token_start, &token_end, NULL, NULL,
836 &end);
838 } else if(name == atomCacheControl)
839 haveCacheControl = 1;
841 releaseAtom(name);
842 name = NULL;
845 i = start;
847 while(1) {
848 i = parseHeaderLine(buf, i,
849 &name_start, &name_end, &value_start, &value_end);
850 if(i < 0) {
851 do_log(L_ERROR, "Couldn't find end of header line.\n");
852 goto fail;
855 if(name_start == -1)
856 break;
858 if(name_start < 0) {
859 do_log(L_WARN, "Couldn't parse header line.\n");
860 if(laxHttpParser)
861 continue;
862 else
863 goto fail;
866 name = internAtomLowerN(buf + name_start, name_end - name_start);
868 if(name == atomProxyConnection) {
869 j = getNextTokenInList(buf, value_start,
870 &token_start, &token_end, NULL, NULL,
871 &end);
872 while(1) {
873 if(j < 0) {
874 do_log(L_WARN, "Couldn't parse Proxy-Connection:");
875 do_log_n(L_WARN, buf + value_start,
876 value_end - value_start);
877 do_log(L_WARN, ".\n");
878 persistent = 0;
879 break;
881 if(token_compare(buf, token_start, token_end, "close")) {
882 persistent = 0;
883 } else if(token_compare(buf, token_start, token_end,
884 "keep-alive")) {
885 persistent = 1;
887 if(end)
888 break;
889 j = getNextTokenInList(buf, j,
890 &token_start, &token_end, NULL, NULL,
891 &end);
893 } else if(name == atomContentLength) {
894 j = skipWhitespace(buf, value_start);
895 if(j < 0) {
896 do_log(L_WARN, "Couldn't parse Content-Length: \n");
897 do_log_n(L_WARN, buf + value_start, value_end - value_start);
898 do_log(L_WARN, ".\n");
899 len = -1;
900 } else {
901 len = strtol(buf + value_start, &endptr, 10);
902 if(endptr <= buf + value_start) {
903 do_log(L_WARN, "Couldn't parse Content-Length: \n");
904 do_log_n(L_WARN, buf + value_start,
905 value_end - value_start);
906 do_log(L_WARN, ".\n");
907 len = -1;
910 } else if((!local && name == atomProxyAuthorization) ||
911 (local && name == atomAuthorization)) {
912 if(auth_return) {
913 auth = internAtomN(buf + value_start, value_end - value_start);
914 if(auth == NULL) {
915 do_log(L_ERROR, "Couldn't allocate authorization.\n");
916 goto fail;
919 } else if(name == atomReferer) {
920 int h;
921 if(censorReferer == 0 ||
922 (censorReferer == 1 && url != NULL &&
923 urlSameHost(url->string, url->length,
924 buf + value_start, value_end - value_start))) {
925 while(hbuf_length > hbuf_size - 2)
926 RESIZE_HBUF();
927 hbuf[hbuf_length++] = '\r';
928 hbuf[hbuf_length++] = '\n';
929 do {
930 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
931 buf + name_start, value_end - name_start);
932 if(h < 0) RESIZE_HBUF();
933 } while(h < 0);
934 hbuf_length = h;
936 } else if(name == atomTrailer || name == atomUpgrade) {
937 do_log(L_ERROR, "Trailers or upgrade present.\n");
938 goto fail;
939 } else if(name == atomDate || name == atomExpires ||
940 name == atomIfModifiedSince ||
941 name == atomIfUnmodifiedSince ||
942 name == atomLastModified ||
943 name == atomXPolipoDate || name == atomXPolipoAccess) {
944 time_t t;
945 j = parse_time(buf, value_start, value_end, &t);
946 if(j < 0) {
947 if(name != atomExpires) {
948 do_log(L_WARN, "Couldn't parse %s: ", name->string);
949 do_log_n(L_WARN, buf + value_start,
950 value_end - value_start);
951 do_log(L_WARN, "\n");
953 t = -1;
955 if(name == atomDate) {
956 if(t >= 0)
957 date = t;
958 } else if(name == atomExpires) {
959 if(t >= 0)
960 expires = t;
961 else
962 expires = 0;
963 } else if(name == atomLastModified)
964 last_modified = t;
965 else if(name == atomIfModifiedSince)
966 ims = t;
967 else if(name == atomIfUnmodifiedSince)
968 inms = t;
969 else if(name == atomXPolipoDate)
970 polipo_age = t;
971 else if(name == atomXPolipoAccess)
972 polipo_access = t;
973 } else if(name == atomAge) {
974 j = skipWhitespace(buf, value_start);
975 if(j < 0) {
976 age = -1;
977 } else {
978 age = strtol(buf + value_start, &endptr, 10);
979 if(endptr <= buf + value_start)
980 age = -1;
982 if(age < 0) {
983 do_log(L_WARN, "Couldn't parse age: \n");
984 do_log_n(L_WARN, buf + value_start, value_end - value_start);
985 do_log(L_WARN, " -- ignored.\n");
987 } else if(name == atomXPolipoBodyOffset) {
988 j = skipWhitespace(buf, value_start);
989 if(j < 0) {
990 do_log(L_ERROR, "Couldn't parse body offset.\n");
991 goto fail;
992 } else {
993 polipo_body_offset = strtol(buf + value_start, &endptr, 10);
994 if(endptr <= buf + value_start) {
995 do_log(L_ERROR, "Couldn't parse body offset.\n");
996 goto fail;
999 } else if(name == atomTransferEncoding) {
1000 if(token_compare(buf, value_start, value_end, "identity"))
1001 te = TE_IDENTITY;
1002 else if(token_compare(buf, value_start, value_end, "chunked"))
1003 te = TE_CHUNKED;
1004 else
1005 te = TE_UNKNOWN;
1006 } else if(name == atomETag ||
1007 name == atomIfNoneMatch || name == atomIfMatch ||
1008 name == atomIfRange) {
1009 int x, y;
1010 int weak;
1011 char *e;
1012 j = getNextETag(buf, value_start, &x, &y, &weak);
1013 if(j < 0) {
1014 if(buf[value_start] != '\r' && buf[value_start] != '\n')
1015 do_log(L_ERROR, "Couldn't parse ETag.\n");
1016 } else if(weak) {
1017 do_log(L_WARN, "Server returned weak ETag -- ignored.\n");
1018 } else {
1019 e = strdup_n(buf + x, y - x);
1020 if(e == NULL) goto fail;
1021 if(name == atomETag) {
1022 if(!etag)
1023 etag = e;
1024 else
1025 free(e);
1026 } else if(name == atomIfNoneMatch) {
1027 if(!inm)
1028 inm = e;
1029 else
1030 free(e);
1031 } else if(name == atomIfMatch) {
1032 if(!im)
1033 im = e;
1034 else
1035 free(e);
1036 } else if(name == atomIfRange) {
1037 if(!ifrange)
1038 ifrange = e;
1039 else
1040 free(e);
1041 } else {
1042 abort();
1045 } else if(name == atomCacheControl) {
1046 int v_start, v_end;
1047 j = getNextTokenInList(buf, value_start,
1048 &token_start, &token_end,
1049 &v_start, &v_end,
1050 &end);
1051 while(1) {
1052 if(j < 0) {
1053 do_log(L_WARN, "Couldn't parse Cache-Control.\n");
1054 cache_control.flags |= CACHE_NO;
1055 break;
1057 if(token_compare(buf, token_start, token_end, "no-cache")) {
1058 cache_control.flags |= CACHE_NO;
1059 } else if(token_compare(buf, token_start, token_end,
1060 "public")) {
1061 cache_control.flags |= CACHE_PUBLIC;
1062 } else if(token_compare(buf, token_start, token_end,
1063 "private")) {
1064 cache_control.flags |= CACHE_PRIVATE;
1065 } else if(token_compare(buf, token_start, token_end,
1066 "no-store")) {
1067 cache_control.flags |= CACHE_NO_STORE;
1068 } else if(token_compare(buf, token_start, token_end,
1069 "no-transform")) {
1070 cache_control.flags |= CACHE_NO_TRANSFORM;
1071 } else if(token_compare(buf, token_start, token_end,
1072 "must-revalidate") ||
1073 token_compare(buf, token_start, token_end,
1074 "must-validate")) { /* losers */
1075 cache_control.flags |= CACHE_MUST_REVALIDATE;
1076 } else if(token_compare(buf, token_start, token_end,
1077 "proxy-revalidate")) {
1078 cache_control.flags |= CACHE_PROXY_REVALIDATE;
1079 } else if(token_compare(buf, token_start, token_end,
1080 "only-if-cached")) {
1081 cache_control.flags |= CACHE_ONLY_IF_CACHED;
1082 } else if(token_compare(buf, token_start, token_end,
1083 "max-age") ||
1084 token_compare(buf, token_start, token_end,
1085 "maxage")) { /* losers */
1086 int a;
1087 if(v_start <= 0 || !digit(buf[v_start])) {
1088 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1089 do_log_n(L_WARN, buf + token_start,
1090 (v_end >= 0 ? v_end : token_end) -
1091 token_start);
1092 do_log(L_WARN, "\n");
1094 a = atoi(buf + v_start);
1095 cache_control.max_age = a;
1096 } else if(token_compare(buf, token_start, token_end,
1097 "s-maxage")) {
1098 int a;
1099 if(v_start <= 0 || !digit(buf[v_start])) {
1100 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1101 do_log_n(L_WARN, buf + token_start,
1102 (v_end >= 0 ? v_end : token_end) -
1103 token_start);
1104 do_log(L_WARN, "\n");
1106 a = atoi(buf + v_start);
1107 cache_control.max_age = a;
1108 } else if(token_compare(buf, token_start, token_end,
1109 "min-fresh")) {
1110 int a;
1111 if(v_start <= 0 || !digit(buf[v_start])) {
1112 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1113 do_log_n(L_WARN, buf + token_start,
1114 (v_end >= 0 ? v_end : token_end) -
1115 token_start);
1116 do_log(L_WARN, "\n");
1118 a = atoi(buf + v_start);
1119 cache_control.max_age = a;
1120 } else if(token_compare(buf, token_start, token_end,
1121 "max-stale")) {
1122 int a;
1123 if(v_start <= 0 || !digit(buf[v_start])) {
1124 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1125 do_log_n(L_WARN, buf + token_start,
1126 (v_end >= 0 ? v_end : token_end) -
1127 token_start);
1128 do_log(L_WARN, "\n");
1130 a = atoi(buf + v_start);
1131 cache_control.max_stale = a;
1132 } else {
1133 do_log(L_WARN, "Unsupported Cache-Control directive ");
1134 do_log_n(L_WARN, buf + token_start,
1135 (v_end >= 0 ? v_end : token_end) - token_start);
1136 do_log(L_WARN, " -- ignored.\n");
1138 if(end)
1139 break;
1140 j = getNextTokenInList(buf, j,
1141 &token_start, &token_end,
1142 &v_start, &v_end,
1143 &end);
1145 } else if(name == atomContentRange) {
1146 if(!client) {
1147 j = parseContentRange(buf, value_start,
1148 &content_range.from, &content_range.to,
1149 &content_range.full_length);
1150 if(j < 0) {
1151 do_log(L_ERROR, "Couldn't parse Content-Range: ");
1152 do_log_n(L_ERROR, buf + value_start,
1153 value_end - value_start);
1154 do_log(L_ERROR, "\n");
1155 goto fail;
1157 } else {
1158 do_log(L_ERROR, "Content-Range from client.\n");
1159 goto fail;
1161 } else if(name == atomRange) {
1162 if(client) {
1163 j = parseRange(buf, value_start, &range.from, &range.to);
1164 if(j < 0) {
1165 do_log(L_WARN, "Couldn't parse Range -- ignored.\n");
1166 range.from = -1;
1167 range.to = -1;
1169 } else {
1170 do_log(L_WARN, "Range from server -- ignored\n");
1172 } else if(name == atomXPolipoLocation) {
1173 if(location_return) {
1174 location =
1175 strdup_n(buf + value_start, value_end - value_start);
1176 if(location == NULL) {
1177 do_log(L_ERROR, "Couldn't allocate location.\n");
1178 goto fail;
1181 } else if(name == atomVia) {
1182 if(via_return) {
1183 AtomPtr new_via, full_via;
1184 new_via =
1185 internAtomN(buf + value_start, value_end - value_start);
1186 if(new_via == NULL) {
1187 do_log(L_ERROR, "Couldn't allocate via.\n");
1188 goto fail;
1190 if(via) {
1191 full_via =
1192 internAtomF("%s, %s", via->string, new_via->string);
1193 releaseAtom(new_via);
1194 if(full_via == NULL) {
1195 do_log(L_ERROR, "Couldn't allocate via");
1196 goto fail;
1198 releaseAtom(via);
1199 via = full_via;
1200 } else {
1201 via = new_via;
1204 } else if(name == atomExpect) {
1205 if(expect_return) {
1206 expect = internAtomLowerN(buf + value_start,
1207 value_end - value_start);
1208 if(expect == NULL) {
1209 do_log(L_ERROR, "Couldn't allocate expect.\n");
1210 goto fail;
1213 } else {
1214 if(!client && name == atomContentType) {
1215 if(token_compare(buf, value_start, value_end,
1216 "multipart/byteranges")) {
1217 do_log(L_ERROR,
1218 "Server returned multipart/byteranges -- yuck!\n");
1219 goto fail;
1222 if(name == atomVary) {
1223 if(!token_compare(buf, value_start, value_end, "host") &&
1224 !token_compare(buf, value_start, value_end, "*")) {
1225 /* What other vary headers should be ignored? */
1226 do_log(L_VARY, "Vary header present (");
1227 do_log_n(L_VARY,
1228 buf + value_start, value_end - value_start);
1229 do_log(L_VARY, ").\n");
1231 cache_control.flags |= CACHE_VARY;
1232 } else if(name == atomAuthorization) {
1233 cache_control.flags |= CACHE_AUTHORIZATION;
1236 if(name == atomPragma) {
1237 /* Pragma is only defined for the client, and the only
1238 standard value is no-cache (RFC 1945, 10.12).
1239 However, we honour a Pragma: no-cache for both the client
1240 and the server when there's no Cache-Control header. In
1241 all cases, we pass the Pragma header to the next hop. */
1242 if(!haveCacheControl) {
1243 j = getNextTokenInList(buf, value_start,
1244 &token_start, &token_end, NULL, NULL,
1245 &end);
1246 while(1) {
1247 if(j < 0) {
1248 do_log(L_WARN, "Couldn't parse Pragma.\n");
1249 cache_control.flags |= CACHE_NO;
1250 break;
1252 if(token_compare(buf, token_start, token_end,
1253 "no-cache"))
1254 cache_control.flags = CACHE_NO;
1255 if(end)
1256 break;
1257 j = getNextTokenInList(buf, j, &token_start, &token_end,
1258 NULL, NULL, &end);
1262 if(!client &&
1263 (name == atomSetCookie ||
1264 name == atomCookie || name == atomCookie2))
1265 cache_control.flags |= CACHE_COOKIE;
1267 if(hbuf) {
1268 if(name != atomConnection && name != atomHost &&
1269 name != atomAcceptRange && name != atomTE &&
1270 name != atomProxyAuthenticate &&
1271 name != atomKeepAlive &&
1272 (!hopToHop || !atomListMember(name, hopToHop)) &&
1273 !atomListMember(name, censoredHeaders)) {
1274 int h;
1275 while(hbuf_length > hbuf_size - 2)
1276 RESIZE_HBUF();
1277 hbuf[hbuf_length++] = '\r';
1278 hbuf[hbuf_length++] = '\n';
1279 do {
1280 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
1281 buf + name_start,
1282 value_end - name_start);
1283 if(h < 0) RESIZE_HBUF();
1284 } while(h < 0);
1285 hbuf_length = h;
1289 releaseAtom(name);
1290 name = NULL;
1293 if(headers_return) {
1294 AtomPtr pheaders = NULL;
1295 pheaders = internAtomN(hbuf, hbuf_length);
1296 if(!pheaders)
1297 goto fail;
1298 *headers_return = pheaders;
1300 if(hbuf != hbuf_small)
1301 free(hbuf);
1302 hbuf = NULL;
1303 hbuf_size = 0;
1305 if(request)
1306 if(!persistent)
1307 request->flags &= ~REQUEST_PERSISTENT;
1309 if(te != TE_IDENTITY) len = -1;
1310 if(len_return) *len_return = len;
1311 if(cache_control_return) *cache_control_return = cache_control;
1312 if(condition_return) {
1313 if(ims >= 0 || inms >= 0 || im || inm || ifrange) {
1314 condition = httpMakeCondition();
1315 if(condition) {
1316 condition->ims = ims;
1317 condition->inms = inms;
1318 condition->im = im;
1319 condition->inm = inm;
1320 condition->ifrange = ifrange;
1321 } else {
1322 do_log(L_ERROR, "Couldn't allocate condition.\n");
1323 if(im) free(im);
1324 if(inm) free(inm);
1326 } else {
1327 condition = NULL;
1329 *condition_return = condition;
1330 } else {
1331 assert(!im && !inm);
1334 if(te_return) *te_return = te;
1335 if(date_return) *date_return = date;
1336 if(last_modified_return) *last_modified_return = last_modified;
1337 if(expires_return) *expires_return = expires;
1338 if(polipo_age_return) *polipo_age_return = polipo_age;
1339 if(polipo_access_return) *polipo_access_return = polipo_access;
1340 if(polipo_body_offset_return)
1341 *polipo_body_offset_return = polipo_body_offset;
1342 if(age_return) *age_return = age;
1343 if(etag_return)
1344 *etag_return = etag;
1345 else {
1346 if(etag) free(etag);
1348 if(range_return) *range_return = range;
1349 if(content_range_return) *content_range_return = content_range;
1350 if(location_return) {
1351 *location_return = location;
1352 } else {
1353 if(location)
1354 free(location);
1356 if(via_return)
1357 *via_return = via;
1358 else {
1359 if(via)
1360 releaseAtom(via);
1362 if(expect_return)
1363 *expect_return = expect;
1364 else {
1365 if(expect)
1366 releaseAtom(expect);
1368 if(auth_return)
1369 *auth_return = auth;
1370 else {
1371 if(auth)
1372 releaseAtom(auth);
1374 if(hopToHop) destroyAtomList(hopToHop);
1375 return i;
1377 fail:
1378 if(hbuf && hbuf != hbuf_small) free(hbuf);
1379 if(name) releaseAtom(name);
1380 if(etag) free(etag);
1381 if(location) free(location);
1382 if(via) releaseAtom(via);
1383 if(expect) releaseAtom(expect);
1384 if(auth) releaseAtom(auth);
1385 if(hopToHop) destroyAtomList(hopToHop);
1387 return -1;
1388 #undef RESIZE_HBUF
1392 httpFindHeader(AtomPtr header, const char *headers, int hlen,
1393 int *value_begin_return, int *value_end_return)
1395 int len = header->length;
1396 int i = 0;
1398 while(i + len + 1 < hlen) {
1399 if(headers[i + len] == ':' &&
1400 lwrcmp(headers + i, header->string, len) == 0) {
1401 int j = i + len + 1, k;
1402 while(j < hlen && headers[j] == ' ')
1403 j++;
1404 k = j;
1405 while(k < hlen && headers[k] != '\n' && headers[k] != '\r')
1406 k++;
1407 *value_begin_return = j;
1408 *value_end_return = k;
1409 return 1;
1410 } else {
1411 while(i < hlen && headers[i] != '\n' && headers[i] != '\r')
1412 i++;
1413 i++;
1414 if(i < hlen && headers[i] == '\n')
1415 i++;
1418 return 0;
1422 parseUrl(const char *url, int len,
1423 int *x_return, int *y_return, int *port_return, int *z_return)
1425 int x, y, z, port = -1, i = 0;
1427 if(len >= 7 && lwrcmp(url, "http://", 7) == 0) {
1428 x = 7;
1429 if(x < len && url[x] == '[') {
1430 /* RFC 2732 */
1431 for(i = x + 1; i < len; i++) {
1432 if(url[i] == ']') {
1433 i++;
1434 break;
1436 if((url[i] != ':') && !letter(url[i]) && !digit(url[i]))
1437 break;
1439 } else {
1440 for(i = x; i < len; i++)
1441 if(url[i] == ':' || url[i] == '/')
1442 break;
1444 y = i;
1446 if(i < len && url[i] == ':') {
1447 int j;
1448 j = atoi_n(url, i + 1, len, &port);
1449 if(j < 0) {
1450 port = 80;
1451 } else {
1452 i = j;
1454 } else {
1455 port = 80;
1457 } else {
1458 x = -1;
1459 y = -1;
1462 z = i;
1464 *x_return = x;
1465 *y_return = y;
1466 *port_return = port;
1467 *z_return = z;
1468 return 0;
1472 urlIsLocal(const char *url, int len)
1474 return (len > 0 && url[0] == '/');
1478 urlIsSpecial(const char *url, int len)
1480 return (len >= 8 && memcmp(url, "/polipo/", 8) == 0);
1484 parseChunkSize(const char *restrict buf, int i, int end,
1485 int *chunk_size_return)
1487 int v, d;
1488 v = h2i(buf[i]);
1489 if(v < 0)
1490 return -1;
1492 i++;
1494 while(i < end) {
1495 d = h2i(buf[i]);
1496 if(d < 0)
1497 break;
1498 v = v * 16 + d;
1499 i++;
1502 while(i < end) {
1503 if(buf[i] == ' ' || buf[i] == '\t')
1504 i++;
1505 else
1506 break;
1509 if(i >= end - 1)
1510 return 0;
1512 if(buf[i] != '\r' || buf[i + 1] != '\n')
1513 return -1;
1515 i += 2;
1517 if(v == 0) {
1518 if(i >= end - 1)
1519 return 0;
1520 if(buf[i] != '\r') {
1521 do_log(L_ERROR, "Trailers present!\n");
1522 return -1;
1524 i++;
1525 if(buf[i] != '\n')
1526 return -1;
1527 i++;
1530 *chunk_size_return = v;
1531 return i;
1536 checkVia(AtomPtr name, AtomPtr via)
1538 int i;
1539 char *v;
1540 if(via == NULL || via->length == 0)
1541 return 1;
1543 v = via->string;
1545 i = 0;
1546 while(i < via->length) {
1547 while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' ||
1548 v[i] == '\r' || v[i] == '\n' ||
1549 digit(v[i]) || v[i] == '.')
1550 i++;
1551 if(i + name->length > via->length)
1552 break;
1553 if(memcmp(v + i, name->string, name->length) == 0) {
1554 char c = v[i + name->length];
1555 if(c == '\0' || c == ' ' || c == '\t' || c == ',' ||
1556 c == '\r' || c == '\n')
1557 return 0;
1559 i++;
1560 while(letter(v[i]) || digit(v[i]) || v[i] == '.')
1561 i++;
1563 return 1;