Update CHANGES; fix a bogus entry, and mention Jake's work.
[polipo.git] / http_parse.c
blobc6d952e71359d1c74404c19e1e68ba9765658399
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 static int getNextWord(const char *buf, int i, int *x_return, int *y_return);
26 static int getNextToken(const char *buf, int i, int *x_return, int *y_return);
27 static int getNextTokenInList(const char *buf, int i,
28 int *x_return, int *y_return,
29 int *z_return, int *t_return,
30 int *end_return);
32 static AtomPtr atomConnection, atomProxyConnection, atomContentLength,
33 atomHost, atomAcceptRange, atomTE,
34 atomReferer, atomProxyAuthenticate, atomProxyAuthorization,
35 atomKeepAlive, atomTrailer, atomUpgrade, atomDate, atomExpires,
36 atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified,
37 atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding,
38 atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange,
39 atomVia, atomVary, atomExpect, atomAuthorization,
40 atomSetCookie, atomCookie, atomCookie2,
41 atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation,
42 atomXPolipoBodyOffset;
44 AtomPtr atomContentType, atomContentEncoding;
46 int censorReferer = 0;
47 int laxHttpParser = 1;
49 static AtomListPtr censoredHeaders;
51 void
52 preinitHttpParser()
54 CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter,
55 "Censor referer headers.");
56 censoredHeaders = makeAtomList(NULL, 0);
57 if(censoredHeaders == NULL) {
58 do_log(L_ERROR, "Couldn't allocate censored atoms.\n");
59 exit(1);
61 CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER,
62 "Headers to censor.");
63 CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter,
64 "Ignore unknown HTTP headers.");
67 void
68 initHttpParser()
70 #define A(name, value) name = internAtom(value); if(!name) goto fail;
71 /* These must be in lower-case */
72 A(atomConnection, "connection");
73 A(atomProxyConnection, "proxy-connection");
74 A(atomContentLength, "content-length");
75 A(atomHost, "host");
76 A(atomAcceptRange, "accept-range");
77 A(atomTE, "te");
78 A(atomReferer, "referer");
79 A(atomProxyAuthenticate, "proxy-authenticate");
80 A(atomProxyAuthorization, "proxy-authorization");
81 A(atomKeepAlive, "keep-alive");
82 A(atomTrailer, "trailer");
83 A(atomUpgrade, "upgrade");
84 A(atomDate, "date");
85 A(atomExpires, "expires");
86 A(atomIfModifiedSince, "if-modified-since");
87 A(atomIfUnmodifiedSince, "if-unmodified-since");
88 A(atomIfRange, "if-range");
89 A(atomLastModified, "last-modified");
90 A(atomIfMatch, "if-match");
91 A(atomIfNoneMatch, "if-none-match");
92 A(atomAge, "age");
93 A(atomTransferEncoding, "transfer-encoding");
94 A(atomETag, "etag");
95 A(atomCacheControl, "cache-control");
96 A(atomPragma, "pragma");
97 A(atomContentRange, "content-range");
98 A(atomRange, "range");
99 A(atomVia, "via");
100 A(atomContentType, "content-type");
101 A(atomContentEncoding, "content-encoding");
102 A(atomVary, "vary");
103 A(atomExpect, "expect");
104 A(atomAuthorization, "authorization");
105 A(atomSetCookie, "set-cookie");
106 A(atomCookie, "cookie");
107 A(atomCookie2, "cookie2");
108 A(atomXPolipoDate, "x-polipo-date");
109 A(atomXPolipoAccess, "x-polipo-access");
110 A(atomXPolipoLocation, "x-polipo-location");
111 A(atomXPolipoBodyOffset, "x-polipo-body-offset");
112 #undef A
113 return;
115 fail:
116 do_log(L_ERROR, "Couldn't allocate atom.\n");
117 exit(1);
120 static int
121 getNextWord(const char *restrict buf, int i, int *x_return, int *y_return)
123 int x, y;
124 while(buf[i] == ' ') i++;
125 if(buf[i] == '\n' || buf[i] == '\r') return -1;
126 x = i;
127 while(buf[i] > 32 && buf[i] < 127) i++;
128 y = i;
130 *x_return = x;
131 *y_return = y;
133 return 0;
136 static int
137 skipComment(const char *restrict buf, int i)
139 assert(buf[i] == '(');
141 i++;
142 while(1) {
143 if(buf[i] == '\\' && buf[i + 1] == ')') i+=2;
144 else if(buf[i] == ')') return i + 1;
145 else if(buf[i] == '\n') {
146 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
147 i += 2;
148 else
149 return -1;
150 } else if(buf[i] == '\r') {
151 if(buf[i + 1] != '\n') return -1;
152 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
153 i += 3;
154 else
155 return -1;
156 } else {
157 i++;
160 return i;
164 static int
165 skipWhitespace(const char *restrict buf, int i)
167 while(1) {
168 if(buf[i] == ' ' || buf[i] == '\t')
169 i++;
170 else if(buf[i] == '(') {
171 i = skipComment(buf, i);
172 if(i < 0) return -1;
173 } else if(buf[i] == '\n') {
174 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
175 i += 2;
176 else
177 return i;
178 } else if(buf[i] == '\r' && buf[i + 1] == '\n') {
179 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
180 i += 3;
181 else
182 return i;
183 } else
184 return i;
188 static int
189 getNextToken(const char *restrict buf, int i, int *x_return, int *y_return)
191 int x, y;
192 again:
193 while(buf[i] == ' ' || buf[i] == '\t')
194 i++;
195 if(buf[i] == '(') {
196 i++;
197 while(buf[i] != ')') {
198 if(buf[i] == '\n' || buf[i] == '\r')
199 return -1;
200 if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r')
201 buf += 2;
202 else
203 buf++;
205 goto again;
207 if(buf[i] == '\n') {
208 if(buf[i + 1] == ' ' || buf[i + 1] == '\t') {
209 i += 2;
210 goto again;
211 } else {
212 return -1;
215 if(buf[i] == '\r') {
216 if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) {
217 i += 3;
218 goto again;
219 } else {
220 return -1;
223 x = i;
224 while(buf[i] > 32 && buf[i] < 127) {
225 switch(buf[i]) {
226 case '(': case ')': case '<': case '>': case '@':
227 case ',': case ';': case ':': case '\\': case '/':
228 case '[': case ']': case '?': case '=':
229 case '{': case '}': case ' ': case '\t':
230 goto out;
231 default:
232 i++;
235 out:
236 y = i;
238 *x_return = x;
239 *y_return = y;
241 return y;
244 static int
245 getNextETag(const char * restrict buf, int i,
246 int *x_return, int *y_return, int *weak_return)
248 int weak = 0;
249 int x, y;
250 while(buf[i] == ' ' || buf[i] == '\t')
251 i++;
252 if(buf[i] == 'W' && buf[i + 1] == '/') {
253 weak = 1;
254 i += 2;
256 if(buf[i] == '"')
257 i++;
258 else
259 return -1;
261 x = i;
262 while(buf[i] != '"') {
263 if(buf[i] == '\r' || buf[i] == '\n')
264 return -1;
265 i++;
267 y = i;
268 i++;
270 *x_return = x;
271 *y_return = y;
272 *weak_return = weak;
273 return i;
276 static int
277 getNextTokenInList(const char *restrict buf, int i,
278 int *x_return, int *y_return,
279 int *z_return, int *t_return,
280 int *end_return)
282 int j, x, y, z = -1, t = -1, end;
283 j = getNextToken(buf, i, &x, &y);
284 if(j < 0)
285 return -1;
286 while(buf[j] == ' ' || buf[j] == '\t')
287 j++;
289 if(buf[j] == '=') {
290 j++;
291 while(buf[j] == ' ' || buf[j] == '\t')
292 j++;
293 z = j;
294 while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r')
295 j++;
298 if(buf[j] == '\n' || buf[j] == '\r') {
299 if(buf[j] == '\r') {
300 if(buf[j + 1] != '\n')
301 return -1;
302 j += 2;
303 } else
304 j++;
305 end = 1;
306 if(buf[j] == ' ' || buf[j] == '\t') {
307 while(buf[j] == ' ' || buf[j] == '\t')
308 j++;
309 end = 0;
311 } else if(buf[j] == ',') {
312 j++;
313 while(buf[j] == ' ' || buf[j] == '\t')
314 j++;
315 end = 0;
316 } else {
317 return -1;
320 *x_return = x;
321 *y_return = y;
322 if(z_return)
323 *z_return = z;
324 if(t_return)
325 *t_return = t;
326 *end_return = end;
327 return j;
330 static inline int
331 token_compare(const char *buf, int start, int end, const char *s)
333 return (strcasecmp_n(s, buf + start, end - start) == 0);
336 static int
337 skipEol(const char *restrict buf, int i)
339 while(buf[i] == ' ')
340 i++;
341 if(buf[i] == '\n')
342 return i + 1;
343 else if(buf[i] == '\r') {
344 if(buf[i + 1] == '\n')
345 return i + 2;
346 else
347 return -1;
348 } else {
349 return -1;
353 static int
354 skipToEol(const char *restrict buf, int i, int *start_return)
356 while(buf[i] != '\n' && buf[i] != '\r')
357 i++;
358 if(buf[i] == '\n') {
359 *start_return = i;
360 return i + 1;
361 } else if(buf[i] == '\r') {
362 if(buf[i + 1] == '\n') {
363 *start_return = i;
364 return i + 2;
365 } else {
366 return -1;
369 return -1;
372 static int
373 getHeaderValue(const char *restrict buf, int start,
374 int *value_start_return, int *value_end_return)
376 int i, j, k;
378 while(buf[start] == ' ' || buf[start] == '\t')
379 start++;
380 i = start;
381 again:
382 j = skipToEol(buf, i, &k);
383 if(j < 0)
384 return -1;
385 if(buf[j] == ' ' || buf[j] == '\t') {
386 i = j + 1;
387 goto again;
389 *value_start_return = start;
390 *value_end_return = k;
391 return j;
395 httpParseClientFirstLine(const char *restrict buf, int offset,
396 int *method_return,
397 AtomPtr *url_return,
398 int *version_return)
400 int i = 0;
401 int x, y;
402 int method;
403 AtomPtr url;
404 int version = HTTP_UNKNOWN;
405 int eol;
407 i = offset;
408 i = getNextWord(buf, i, &x, &y);
409 if(i < 0) return -1;
410 if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0)
411 method = METHOD_GET;
412 else if(y == x + 4 && memcmp(buf + x, "HEAD", 4) == 0)
413 method = METHOD_HEAD;
414 else if(y == x + 4 && memcmp(buf + x, "POST", 4) == 0)
415 method = METHOD_POST;
416 else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0)
417 method = METHOD_PUT;
418 else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0)
419 method = METHOD_CONNECT;
420 else
421 method = METHOD_UNKNOWN;
423 i = getNextWord(buf, y + 1, &x, &y);
424 if(i < 0) return -1;
426 url = internAtomN(buf + x, y - x);
428 i = getNextWord(buf, y + 1, &x, &y);
429 if(i < 0) {
430 releaseAtom(url);
431 return -1;
434 if(y == x + 8) {
435 if(memcmp(buf + x, "HTTP/1.", 7) != 0)
436 version = HTTP_UNKNOWN;
437 else if(buf[x + 7] == '0')
438 version = HTTP_10;
439 else if(buf[x + 7] >= '1' && buf[x + 7] <= '9')
440 version = HTTP_11;
441 else
442 version = HTTP_UNKNOWN;
445 eol = skipEol(buf, y);
446 if(eol < 0) return -1;
448 *method_return = method;
449 if(url_return)
450 *url_return = url;
451 else
452 releaseAtom(url);
453 *version_return = version;
454 return eol;
458 httpParseServerFirstLine(const char *restrict buf,
459 int *status_return,
460 int *version_return,
461 AtomPtr *message_return)
463 int i = 0;
464 int x, y, eol;
465 int status;
466 int version = HTTP_UNKNOWN;
468 i = getNextWord(buf, 0, &x, &y);
469 if(i < 0)
470 return -1;
471 if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0)
472 version = HTTP_10;
473 else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0)
474 version = HTTP_11;
475 else
476 version = HTTP_UNKNOWN;
478 i = getNextWord(buf, y + 1, &x, &y);
479 if(i < 0) return -1;
480 if(y == x + 3)
481 status = atol(buf + x);
482 else return -1;
484 i = skipToEol(buf, y, &eol);
485 if(i < 0) return -1;
487 *status_return = status;
488 *version_return = version;
489 if(message_return) {
490 /* Netscape enterprise bug */
491 if(eol > y)
492 *message_return = internAtomN(buf + y + 1, eol - y - 1);
493 else
494 *message_return = internAtom("No message");
496 return i;
499 static int
500 parseInt(const char *restrict buf, int start, int *val_return)
502 int i = start, val = 0;
503 if(!digit(buf[i]))
504 return -1;
505 while(digit(buf[i])) {
506 val = val * 10 + (buf[i] - '0');
507 i++;
509 *val_return = val;
510 return i;
513 /* Returned *name_start_return is -1 at end of headers, -2 if the line
514 couldn't be parsed. */
515 static int
516 parseHeaderLine(const char *restrict buf, int start,
517 int *name_start_return, int *name_end_return,
518 int *value_start_return, int *value_end_return)
520 int i;
521 int name_start, name_end, value_start, value_end;
523 if(buf[start] == '\n') {
524 *name_start_return = -1;
525 return start + 1;
527 if(buf[start] == '\r' && buf[start + 1] == '\n') {
528 *name_start_return = -1;
529 return start + 2;
532 i = getNextToken(buf, start, &name_start, &name_end);
533 if(i < 0 || buf[i] != ':')
534 goto syntax;
535 i++;
536 while(buf[i] == ' ' || buf[i] == '\t')
537 i++;
539 i = getHeaderValue(buf, i, &value_start, &value_end);
540 if(i < 0)
541 goto syntax;
543 *name_start_return = name_start;
544 *name_end_return = name_end;
545 *value_start_return = value_start;
546 *value_end_return = value_end;
547 return i;
549 syntax:
550 i = start;
551 while(1) {
552 if(buf[i] == '\n') {
553 i++;
554 break;
556 if(buf[i] == '\r' && buf[i + 1] == '\n') {
557 i += 2;
558 break;
560 i++;
562 *name_start_return = -2;
563 return i;
567 findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return)
569 int i = from;
570 int eol = 0;
571 while(i < to) {
572 if(buf[i] == '\n') {
573 if(eol) {
574 *body_return = i + 1;
575 return eol;
577 eol = i;
578 i++;
579 } else if(buf[i] == '\r') {
580 if(i < to - 1 && buf[i + 1] == '\n') {
581 if(eol) {
582 *body_return = eol;
583 return i + 2;
585 eol = i;
586 i += 2;
587 } else {
588 eol = 0;
589 i++;
591 } else {
592 eol = 0;
593 i++;
596 return -1;
599 static int
600 parseContentRange(const char *restrict buf, int i,
601 int *from_return, int *to_return, int *full_len_return)
603 int j;
604 int from, to, full_len;
606 i = skipWhitespace(buf, i);
607 if(i < 0) return -1;
608 if(!token_compare(buf, i, i + 5, "bytes")) {
609 do_log(L_WARN, "Incorrect Content-Range header -- chugging along.\n");
610 } else {
611 i += 5;
613 i = skipWhitespace(buf, i);
614 if(buf[i] == '*') {
615 from = 0;
616 to = -1;
617 i++;
618 } else {
619 i = parseInt(buf, i, &from);
620 if(i < 0) return -1;
621 if(buf[i] != '-') return -1;
622 i++;
623 i = parseInt(buf, i, &to);
624 if(i < 0) return -1;
625 to = to + 1;
627 if(buf[i] != '/')
628 return -1;
629 i++;
630 if(buf[i] == '*')
631 full_len = -1;
632 else {
633 i = parseInt(buf, i, &full_len);
634 if(i < 0) return -1;
636 j = skipEol(buf, i);
637 if(j < 0)
638 return -1;
640 *from_return = from;
641 *to_return = to;
642 *full_len_return = full_len;
643 return i;
646 static int
647 parseRange(const char *restrict buf, int i,
648 int *from_return, int *to_return)
650 int j;
651 int from, to;
653 i = skipWhitespace(buf, i);
654 if(i < 0)
655 return -1;
656 if(!token_compare(buf, i, i + 6, "bytes="))
657 return -1;
658 i += 6;
659 i = skipWhitespace(buf, i);
660 if(buf[i] == '-') {
661 from = 0;
662 } else {
663 i = parseInt(buf, i, &from);
664 if(i < 0) return -1;
666 if(buf[i] != '-')
667 return -1;
668 i++;
669 j = parseInt(buf, i, &to);
670 if(j < 0)
671 to = -1;
672 else {
673 to = to + 1;
674 i = j;
676 j = skipEol(buf, i);
677 if(j < 0) return -1;
678 *from_return = from;
679 *to_return = to;
680 return i;
683 static int
684 urlSameHost(const char *url1, int len1, const char *url2, int len2)
686 int i;
687 if(len1 < 7 || len2 < 7)
688 return 0;
689 if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0)
690 return 0;
692 i = 7;
693 while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') {
694 if((url1[i] | 0x20) != (url2[i] | 0x20))
695 break;
696 i++;
699 if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/')))
700 return 1;
701 return 0;
704 static char *
705 resize_hbuf(char *hbuf, int *size, char *hbuf_small)
707 int new_size = 2 * *size;
708 char *new_hbuf;
710 if(new_size <= *size)
711 goto fail;
713 if(hbuf == hbuf_small) {
714 new_hbuf = malloc(new_size);
715 if(new_hbuf == NULL) goto fail;
716 memcpy(new_hbuf, hbuf, *size);
717 } else {
718 new_hbuf = realloc(hbuf, new_size);
719 if(new_hbuf == NULL) goto fail;
721 *size = new_size;
722 return new_hbuf;
724 fail:
725 if(hbuf != hbuf_small)
726 free(hbuf);
727 *size = 0;
728 return NULL;
732 httpParseHeaders(int client, AtomPtr url,
733 const char *buf, int start, HTTPRequestPtr request,
734 AtomPtr *headers_return,
735 int *len_return, CacheControlPtr cache_control_return,
736 HTTPConditionPtr *condition_return, int *te_return,
737 time_t *date_return, time_t *last_modified_return,
738 time_t *expires_return, time_t *polipo_age_return,
739 time_t *polipo_access_return, int *polipo_body_offset_return,
740 int *age_return, char **etag_return, AtomPtr *expect_return,
741 HTTPRangePtr range_return, HTTPRangePtr content_range_return,
742 char **location_return, AtomPtr *via_return,
743 AtomPtr *auth_return)
745 int local = url ? urlIsLocal(url->string, url->length) : 0;
746 char hbuf_small[512];
747 char *hbuf = hbuf_small;
748 int hbuf_size = 512, hbuf_length = 0;
749 int i, j,
750 name_start, name_end, value_start, value_end,
751 token_start, token_end, end;
752 AtomPtr name = NULL;
753 time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1,
754 polipo_access = -1, polipo_body_offset = -1;
755 int len = -1;
756 CacheControlRec cache_control;
757 char *endptr;
758 int te = TE_IDENTITY;
759 int age = -1;
760 char *etag = NULL, *ifrange = NULL;
761 int persistent = (!request || (request->connection->version != HTTP_10));
762 char *location = NULL;
763 AtomPtr via = NULL;
764 AtomPtr auth = NULL;
765 AtomPtr expect = NULL;
766 HTTPConditionPtr condition;
767 time_t ims = -1, inms = -1;
768 char *im = NULL, *inm = NULL;
769 AtomListPtr hopToHop = NULL;
770 HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1};
771 int haveCacheControl = 0;
773 #define RESIZE_HBUF() \
774 do { \
775 hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \
776 if(hbuf == NULL) \
777 goto fail; \
778 } while(0)
780 cache_control.flags = 0;
781 cache_control.max_age = -1;
782 cache_control.s_maxage = -1;
783 cache_control.min_fresh = -1;
784 cache_control.max_stale = -1;
786 i = start;
788 while(1) {
789 i = parseHeaderLine(buf, i,
790 &name_start, &name_end, &value_start, &value_end);
791 if(i < 0) {
792 do_log(L_ERROR, "Couldn't find end of header line.\n");
793 goto fail;
796 if(name_start == -1)
797 break;
799 if(name_start < 0)
800 continue;
802 name = internAtomLowerN(buf + name_start, name_end - name_start);
804 if(name == atomConnection) {
805 j = getNextTokenInList(buf, value_start,
806 &token_start, &token_end, NULL, NULL,
807 &end);
808 while(1) {
809 if(j < 0) {
810 do_log(L_ERROR, "Couldn't parse Connection: ");
811 do_log_n(L_ERROR, buf + value_start,
812 value_end - value_start);
813 do_log(L_ERROR, ".\n");
814 goto fail;
816 if(token_compare(buf, token_start, token_end, "close")) {
817 persistent = 0;
818 } else if(token_compare(buf, token_start, token_end,
819 "keep-alive")) {
820 persistent = 1;
821 } else {
822 if(hopToHop == NULL)
823 hopToHop = makeAtomList(NULL, 0);
824 if(hopToHop == NULL) {
825 do_log(L_ERROR, "Couldn't allocate atom list.\n");
826 goto fail;
828 atomListCons(internAtomLowerN(buf + token_start,
829 token_end - token_start),
830 hopToHop);
832 if(end)
833 break;
834 j = getNextTokenInList(buf, j,
835 &token_start, &token_end, NULL, NULL,
836 &end);
838 } else if(name == atomCacheControl)
839 haveCacheControl = 1;
841 releaseAtom(name);
842 name = NULL;
845 i = start;
847 while(1) {
848 i = parseHeaderLine(buf, i,
849 &name_start, &name_end, &value_start, &value_end);
850 if(i < 0) {
851 do_log(L_ERROR, "Couldn't find end of header line.\n");
852 goto fail;
855 if(name_start == -1)
856 break;
858 if(name_start < 0) {
859 do_log(L_WARN, "Couldn't parse header line.\n");
860 if(laxHttpParser)
861 continue;
862 else
863 goto fail;
866 name = internAtomLowerN(buf + name_start, name_end - name_start);
868 if(name == atomProxyConnection) {
869 j = getNextTokenInList(buf, value_start,
870 &token_start, &token_end, NULL, NULL,
871 &end);
872 while(1) {
873 if(j < 0) {
874 do_log(L_WARN, "Couldn't parse Proxy-Connection:");
875 do_log_n(L_WARN, buf + value_start,
876 value_end - value_start);
877 do_log(L_WARN, ".\n");
878 persistent = 0;
879 break;
881 if(token_compare(buf, token_start, token_end, "close")) {
882 persistent = 0;
883 } else if(token_compare(buf, token_start, token_end,
884 "keep-alive")) {
885 persistent = 1;
887 if(end)
888 break;
889 j = getNextTokenInList(buf, j,
890 &token_start, &token_end, NULL, NULL,
891 &end);
893 } else if(name == atomContentLength) {
894 j = skipWhitespace(buf, value_start);
895 if(j < 0) {
896 do_log(L_WARN, "Couldn't parse Content-Length: \n");
897 do_log_n(L_WARN, buf + value_start, value_end - value_start);
898 do_log(L_WARN, ".\n");
899 len = -1;
900 } else {
901 errno = 0;
902 len = strtol(buf + value_start, &endptr, 10);
903 if(errno == ERANGE || endptr <= buf + value_start) {
904 do_log(L_WARN, "Couldn't parse Content-Length: \n");
905 do_log_n(L_WARN, buf + value_start,
906 value_end - value_start);
907 do_log(L_WARN, ".\n");
908 len = -1;
911 } else if((!local && name == atomProxyAuthorization) ||
912 (local && name == atomAuthorization)) {
913 if(auth_return) {
914 auth = internAtomN(buf + value_start, value_end - value_start);
915 if(auth == NULL) {
916 do_log(L_ERROR, "Couldn't allocate authorization.\n");
917 goto fail;
920 } else if(name == atomReferer) {
921 int h;
922 if(censorReferer == 0 ||
923 (censorReferer == 1 && url != NULL &&
924 urlSameHost(url->string, url->length,
925 buf + value_start, value_end - value_start))) {
926 while(hbuf_length > hbuf_size - 2)
927 RESIZE_HBUF();
928 hbuf[hbuf_length++] = '\r';
929 hbuf[hbuf_length++] = '\n';
930 do {
931 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
932 buf + name_start, value_end - name_start);
933 if(h < 0) RESIZE_HBUF();
934 } while(h < 0);
935 hbuf_length = h;
937 } else if(name == atomTrailer || name == atomUpgrade) {
938 do_log(L_ERROR, "Trailers or upgrade present.\n");
939 goto fail;
940 } else if(name == atomDate || name == atomExpires ||
941 name == atomIfModifiedSince ||
942 name == atomIfUnmodifiedSince ||
943 name == atomLastModified ||
944 name == atomXPolipoDate || name == atomXPolipoAccess) {
945 time_t t;
946 j = parse_time(buf, value_start, value_end, &t);
947 if(j < 0) {
948 if(name != atomExpires) {
949 do_log(L_WARN, "Couldn't parse %s: ", name->string);
950 do_log_n(L_WARN, buf + value_start,
951 value_end - value_start);
952 do_log(L_WARN, "\n");
954 t = -1;
956 if(name == atomDate) {
957 if(t >= 0)
958 date = t;
959 } else if(name == atomExpires) {
960 if(t >= 0)
961 expires = t;
962 else
963 expires = 0;
964 } else if(name == atomLastModified)
965 last_modified = t;
966 else if(name == atomIfModifiedSince)
967 ims = t;
968 else if(name == atomIfUnmodifiedSince)
969 inms = t;
970 else if(name == atomXPolipoDate)
971 polipo_age = t;
972 else if(name == atomXPolipoAccess)
973 polipo_access = t;
974 } else if(name == atomAge) {
975 j = skipWhitespace(buf, value_start);
976 if(j < 0) {
977 age = -1;
978 } else {
979 errno = 0;
980 age = strtol(buf + value_start, &endptr, 10);
981 if(errno == ERANGE || endptr <= buf + value_start)
982 age = -1;
984 if(age < 0) {
985 do_log(L_WARN, "Couldn't parse age: \n");
986 do_log_n(L_WARN, buf + value_start, value_end - value_start);
987 do_log(L_WARN, " -- ignored.\n");
989 } else if(name == atomXPolipoBodyOffset) {
990 j = skipWhitespace(buf, value_start);
991 if(j < 0) {
992 do_log(L_ERROR, "Couldn't parse body offset.\n");
993 goto fail;
994 } else {
995 errno = 0;
996 polipo_body_offset = strtol(buf + value_start, &endptr, 10);
997 if(errno == ERANGE || endptr <= buf + value_start) {
998 do_log(L_ERROR, "Couldn't parse body offset.\n");
999 goto fail;
1002 } else if(name == atomTransferEncoding) {
1003 if(token_compare(buf, value_start, value_end, "identity"))
1004 te = TE_IDENTITY;
1005 else if(token_compare(buf, value_start, value_end, "chunked"))
1006 te = TE_CHUNKED;
1007 else
1008 te = TE_UNKNOWN;
1009 } else if(name == atomETag ||
1010 name == atomIfNoneMatch || name == atomIfMatch ||
1011 name == atomIfRange) {
1012 int x, y;
1013 int weak;
1014 char *e;
1015 j = getNextETag(buf, value_start, &x, &y, &weak);
1016 if(j < 0) {
1017 if(buf[value_start] != '\r' && buf[value_start] != '\n')
1018 do_log(L_ERROR, "Couldn't parse ETag.\n");
1019 } else if(weak) {
1020 do_log(L_WARN, "Server returned weak ETag -- ignored.\n");
1021 } else {
1022 e = strdup_n(buf + x, y - x);
1023 if(e == NULL) goto fail;
1024 if(name == atomETag) {
1025 if(!etag)
1026 etag = e;
1027 else
1028 free(e);
1029 } else if(name == atomIfNoneMatch) {
1030 if(!inm)
1031 inm = e;
1032 else
1033 free(e);
1034 } else if(name == atomIfMatch) {
1035 if(!im)
1036 im = e;
1037 else
1038 free(e);
1039 } else if(name == atomIfRange) {
1040 if(!ifrange)
1041 ifrange = e;
1042 else
1043 free(e);
1044 } else {
1045 abort();
1048 } else if(name == atomCacheControl) {
1049 int v_start, v_end;
1050 j = getNextTokenInList(buf, value_start,
1051 &token_start, &token_end,
1052 &v_start, &v_end,
1053 &end);
1054 while(1) {
1055 if(j < 0) {
1056 do_log(L_WARN, "Couldn't parse Cache-Control.\n");
1057 cache_control.flags |= CACHE_NO;
1058 break;
1060 if(token_compare(buf, token_start, token_end, "no-cache")) {
1061 cache_control.flags |= CACHE_NO;
1062 } else if(token_compare(buf, token_start, token_end,
1063 "public")) {
1064 cache_control.flags |= CACHE_PUBLIC;
1065 } else if(token_compare(buf, token_start, token_end,
1066 "private")) {
1067 cache_control.flags |= CACHE_PRIVATE;
1068 } else if(token_compare(buf, token_start, token_end,
1069 "no-store")) {
1070 cache_control.flags |= CACHE_NO_STORE;
1071 } else if(token_compare(buf, token_start, token_end,
1072 "no-transform")) {
1073 cache_control.flags |= CACHE_NO_TRANSFORM;
1074 } else if(token_compare(buf, token_start, token_end,
1075 "must-revalidate") ||
1076 token_compare(buf, token_start, token_end,
1077 "must-validate")) { /* losers */
1078 cache_control.flags |= CACHE_MUST_REVALIDATE;
1079 } else if(token_compare(buf, token_start, token_end,
1080 "proxy-revalidate")) {
1081 cache_control.flags |= CACHE_PROXY_REVALIDATE;
1082 } else if(token_compare(buf, token_start, token_end,
1083 "only-if-cached")) {
1084 cache_control.flags |= CACHE_ONLY_IF_CACHED;
1085 } else if(token_compare(buf, token_start, token_end,
1086 "max-age") ||
1087 token_compare(buf, token_start, token_end,
1088 "maxage")) { /* losers */
1089 int a;
1090 if(v_start <= 0 || !digit(buf[v_start])) {
1091 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1092 do_log_n(L_WARN, buf + token_start,
1093 (v_end >= 0 ? v_end : token_end) -
1094 token_start);
1095 do_log(L_WARN, "\n");
1097 a = atoi(buf + v_start);
1098 cache_control.max_age = a;
1099 } else if(token_compare(buf, token_start, token_end,
1100 "s-maxage")) {
1101 int a;
1102 if(v_start <= 0 || !digit(buf[v_start])) {
1103 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1104 do_log_n(L_WARN, buf + token_start,
1105 (v_end >= 0 ? v_end : token_end) -
1106 token_start);
1107 do_log(L_WARN, "\n");
1109 a = atoi(buf + v_start);
1110 cache_control.max_age = a;
1111 } else if(token_compare(buf, token_start, token_end,
1112 "min-fresh")) {
1113 int a;
1114 if(v_start <= 0 || !digit(buf[v_start])) {
1115 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1116 do_log_n(L_WARN, buf + token_start,
1117 (v_end >= 0 ? v_end : token_end) -
1118 token_start);
1119 do_log(L_WARN, "\n");
1121 a = atoi(buf + v_start);
1122 cache_control.max_age = a;
1123 } else if(token_compare(buf, token_start, token_end,
1124 "max-stale")) {
1125 int a;
1126 if(v_start <= 0 || !digit(buf[v_start])) {
1127 do_log(L_WARN, "Couldn't parse Cache-Control: ");
1128 do_log_n(L_WARN, buf + token_start,
1129 (v_end >= 0 ? v_end : token_end) -
1130 token_start);
1131 do_log(L_WARN, "\n");
1133 a = atoi(buf + v_start);
1134 cache_control.max_stale = a;
1135 } else {
1136 do_log(L_WARN, "Unsupported Cache-Control directive ");
1137 do_log_n(L_WARN, buf + token_start,
1138 (v_end >= 0 ? v_end : token_end) - token_start);
1139 do_log(L_WARN, " -- ignored.\n");
1141 if(end)
1142 break;
1143 j = getNextTokenInList(buf, j,
1144 &token_start, &token_end,
1145 &v_start, &v_end,
1146 &end);
1148 } else if(name == atomContentRange) {
1149 if(!client) {
1150 j = parseContentRange(buf, value_start,
1151 &content_range.from, &content_range.to,
1152 &content_range.full_length);
1153 if(j < 0) {
1154 do_log(L_ERROR, "Couldn't parse Content-Range: ");
1155 do_log_n(L_ERROR, buf + value_start,
1156 value_end - value_start);
1157 do_log(L_ERROR, "\n");
1158 goto fail;
1160 } else {
1161 do_log(L_ERROR, "Content-Range from client.\n");
1162 goto fail;
1164 } else if(name == atomRange) {
1165 if(client) {
1166 j = parseRange(buf, value_start, &range.from, &range.to);
1167 if(j < 0) {
1168 do_log(L_WARN, "Couldn't parse Range -- ignored.\n");
1169 range.from = -1;
1170 range.to = -1;
1172 } else {
1173 do_log(L_WARN, "Range from server -- ignored\n");
1175 } else if(name == atomXPolipoLocation) {
1176 if(location_return) {
1177 location =
1178 strdup_n(buf + value_start, value_end - value_start);
1179 if(location == NULL) {
1180 do_log(L_ERROR, "Couldn't allocate location.\n");
1181 goto fail;
1184 } else if(name == atomVia) {
1185 if(via_return) {
1186 AtomPtr new_via, full_via;
1187 new_via =
1188 internAtomN(buf + value_start, value_end - value_start);
1189 if(new_via == NULL) {
1190 do_log(L_ERROR, "Couldn't allocate via.\n");
1191 goto fail;
1193 if(via) {
1194 full_via =
1195 internAtomF("%s, %s", via->string, new_via->string);
1196 releaseAtom(new_via);
1197 if(full_via == NULL) {
1198 do_log(L_ERROR, "Couldn't allocate via");
1199 goto fail;
1201 releaseAtom(via);
1202 via = full_via;
1203 } else {
1204 via = new_via;
1207 } else if(name == atomExpect) {
1208 if(expect_return) {
1209 expect = internAtomLowerN(buf + value_start,
1210 value_end - value_start);
1211 if(expect == NULL) {
1212 do_log(L_ERROR, "Couldn't allocate expect.\n");
1213 goto fail;
1216 } else {
1217 if(!client && name == atomContentType) {
1218 if(token_compare(buf, value_start, value_end,
1219 "multipart/byteranges")) {
1220 do_log(L_ERROR,
1221 "Server returned multipart/byteranges -- yuck!\n");
1222 goto fail;
1225 if(name == atomVary) {
1226 if(!token_compare(buf, value_start, value_end, "host") &&
1227 !token_compare(buf, value_start, value_end, "*")) {
1228 /* What other vary headers should be ignored? */
1229 do_log(L_VARY, "Vary header present (");
1230 do_log_n(L_VARY,
1231 buf + value_start, value_end - value_start);
1232 do_log(L_VARY, ").\n");
1234 cache_control.flags |= CACHE_VARY;
1235 } else if(name == atomAuthorization) {
1236 cache_control.flags |= CACHE_AUTHORIZATION;
1239 if(name == atomPragma) {
1240 /* Pragma is only defined for the client, and the only
1241 standard value is no-cache (RFC 1945, 10.12).
1242 However, we honour a Pragma: no-cache for both the client
1243 and the server when there's no Cache-Control header. In
1244 all cases, we pass the Pragma header to the next hop. */
1245 if(!haveCacheControl) {
1246 j = getNextTokenInList(buf, value_start,
1247 &token_start, &token_end, NULL, NULL,
1248 &end);
1249 while(1) {
1250 if(j < 0) {
1251 do_log(L_WARN, "Couldn't parse Pragma.\n");
1252 cache_control.flags |= CACHE_NO;
1253 break;
1255 if(token_compare(buf, token_start, token_end,
1256 "no-cache"))
1257 cache_control.flags = CACHE_NO;
1258 if(end)
1259 break;
1260 j = getNextTokenInList(buf, j, &token_start, &token_end,
1261 NULL, NULL, &end);
1265 if(!client &&
1266 (name == atomSetCookie ||
1267 name == atomCookie || name == atomCookie2))
1268 cache_control.flags |= CACHE_COOKIE;
1270 if(hbuf) {
1271 if(name != atomConnection && name != atomHost &&
1272 name != atomAcceptRange && name != atomTE &&
1273 name != atomProxyAuthenticate &&
1274 name != atomKeepAlive &&
1275 (!hopToHop || !atomListMember(name, hopToHop)) &&
1276 !atomListMember(name, censoredHeaders)) {
1277 int h;
1278 while(hbuf_length > hbuf_size - 2)
1279 RESIZE_HBUF();
1280 hbuf[hbuf_length++] = '\r';
1281 hbuf[hbuf_length++] = '\n';
1282 do {
1283 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
1284 buf + name_start,
1285 value_end - name_start);
1286 if(h < 0) RESIZE_HBUF();
1287 } while(h < 0);
1288 hbuf_length = h;
1292 releaseAtom(name);
1293 name = NULL;
1296 if(headers_return) {
1297 AtomPtr pheaders = NULL;
1298 pheaders = internAtomN(hbuf, hbuf_length);
1299 if(!pheaders)
1300 goto fail;
1301 *headers_return = pheaders;
1303 if(hbuf != hbuf_small)
1304 free(hbuf);
1305 hbuf = NULL;
1306 hbuf_size = 0;
1308 if(request)
1309 if(!persistent)
1310 request->flags &= ~REQUEST_PERSISTENT;
1312 if(te != TE_IDENTITY) len = -1;
1313 if(len_return) *len_return = len;
1314 if(cache_control_return) *cache_control_return = cache_control;
1315 if(condition_return) {
1316 if(ims >= 0 || inms >= 0 || im || inm || ifrange) {
1317 condition = httpMakeCondition();
1318 if(condition) {
1319 condition->ims = ims;
1320 condition->inms = inms;
1321 condition->im = im;
1322 condition->inm = inm;
1323 condition->ifrange = ifrange;
1324 } else {
1325 do_log(L_ERROR, "Couldn't allocate condition.\n");
1326 if(im) free(im);
1327 if(inm) free(inm);
1329 } else {
1330 condition = NULL;
1332 *condition_return = condition;
1333 } else {
1334 assert(!im && !inm);
1337 if(te_return) *te_return = te;
1338 if(date_return) *date_return = date;
1339 if(last_modified_return) *last_modified_return = last_modified;
1340 if(expires_return) *expires_return = expires;
1341 if(polipo_age_return) *polipo_age_return = polipo_age;
1342 if(polipo_access_return) *polipo_access_return = polipo_access;
1343 if(polipo_body_offset_return)
1344 *polipo_body_offset_return = polipo_body_offset;
1345 if(age_return) *age_return = age;
1346 if(etag_return)
1347 *etag_return = etag;
1348 else {
1349 if(etag) free(etag);
1351 if(range_return) *range_return = range;
1352 if(content_range_return) *content_range_return = content_range;
1353 if(location_return) {
1354 *location_return = location;
1355 } else {
1356 if(location)
1357 free(location);
1359 if(via_return)
1360 *via_return = via;
1361 else {
1362 if(via)
1363 releaseAtom(via);
1365 if(expect_return)
1366 *expect_return = expect;
1367 else {
1368 if(expect)
1369 releaseAtom(expect);
1371 if(auth_return)
1372 *auth_return = auth;
1373 else {
1374 if(auth)
1375 releaseAtom(auth);
1377 if(hopToHop) destroyAtomList(hopToHop);
1378 return i;
1380 fail:
1381 if(hbuf && hbuf != hbuf_small) free(hbuf);
1382 if(name) releaseAtom(name);
1383 if(etag) free(etag);
1384 if(location) free(location);
1385 if(via) releaseAtom(via);
1386 if(expect) releaseAtom(expect);
1387 if(auth) releaseAtom(auth);
1388 if(hopToHop) destroyAtomList(hopToHop);
1390 return -1;
1391 #undef RESIZE_HBUF
1395 httpFindHeader(AtomPtr header, const char *headers, int hlen,
1396 int *value_begin_return, int *value_end_return)
1398 int len = header->length;
1399 int i = 0;
1401 while(i + len + 1 < hlen) {
1402 if(headers[i + len] == ':' &&
1403 lwrcmp(headers + i, header->string, len) == 0) {
1404 int j = i + len + 1, k;
1405 while(j < hlen && headers[j] == ' ')
1406 j++;
1407 k = j;
1408 while(k < hlen && headers[k] != '\n' && headers[k] != '\r')
1409 k++;
1410 *value_begin_return = j;
1411 *value_end_return = k;
1412 return 1;
1413 } else {
1414 while(i < hlen && headers[i] != '\n' && headers[i] != '\r')
1415 i++;
1416 i++;
1417 if(i < hlen && headers[i] == '\n')
1418 i++;
1421 return 0;
1425 parseUrl(const char *url, int len,
1426 int *x_return, int *y_return, int *port_return, int *z_return)
1428 int x, y, z, port = -1, i = 0;
1430 if(len >= 7 && lwrcmp(url, "http://", 7) == 0) {
1431 x = 7;
1432 if(x < len && url[x] == '[') {
1433 /* RFC 2732 */
1434 for(i = x + 1; i < len; i++) {
1435 if(url[i] == ']') {
1436 i++;
1437 break;
1439 if((url[i] != ':') && !letter(url[i]) && !digit(url[i]))
1440 break;
1442 } else {
1443 for(i = x; i < len; i++)
1444 if(url[i] == ':' || url[i] == '/')
1445 break;
1447 y = i;
1449 if(i < len && url[i] == ':') {
1450 int j;
1451 j = atoi_n(url, i + 1, len, &port);
1452 if(j < 0) {
1453 port = 80;
1454 } else {
1455 i = j;
1457 } else {
1458 port = 80;
1460 } else {
1461 x = -1;
1462 y = -1;
1465 z = i;
1467 *x_return = x;
1468 *y_return = y;
1469 *port_return = port;
1470 *z_return = z;
1471 return 0;
1475 urlIsLocal(const char *url, int len)
1477 return (len > 0 && url[0] == '/');
1481 urlIsSpecial(const char *url, int len)
1483 return (len >= 8 && memcmp(url, "/polipo/", 8) == 0);
1487 parseChunkSize(const char *restrict buf, int i, int end,
1488 int *chunk_size_return)
1490 int v, d;
1491 v = h2i(buf[i]);
1492 if(v < 0)
1493 return -1;
1495 i++;
1497 while(i < end) {
1498 d = h2i(buf[i]);
1499 if(d < 0)
1500 break;
1501 v = v * 16 + d;
1502 i++;
1505 while(i < end) {
1506 if(buf[i] == ' ' || buf[i] == '\t')
1507 i++;
1508 else
1509 break;
1512 if(i >= end - 1)
1513 return 0;
1515 if(buf[i] != '\r' || buf[i + 1] != '\n')
1516 return -1;
1518 i += 2;
1520 if(v == 0) {
1521 if(i >= end - 1)
1522 return 0;
1523 if(buf[i] != '\r') {
1524 do_log(L_ERROR, "Trailers present!\n");
1525 return -1;
1527 i++;
1528 if(buf[i] != '\n')
1529 return -1;
1530 i++;
1533 *chunk_size_return = v;
1534 return i;
1539 checkVia(AtomPtr name, AtomPtr via)
1541 int i;
1542 char *v;
1543 if(via == NULL || via->length == 0)
1544 return 1;
1546 v = via->string;
1548 i = 0;
1549 while(i < via->length) {
1550 while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' ||
1551 v[i] == '\r' || v[i] == '\n' ||
1552 digit(v[i]) || v[i] == '.')
1553 i++;
1554 if(i + name->length > via->length)
1555 break;
1556 if(memcmp(v + i, name->string, name->length) == 0) {
1557 char c = v[i + name->length];
1558 if(c == '\0' || c == ' ' || c == '\t' || c == ',' ||
1559 c == '\r' || c == '\n')
1560 return 0;
1562 i++;
1563 while(letter(v[i]) || digit(v[i]) || v[i] == '.')
1564 i++;
1566 return 1;