release.sh: restore -jJAILDIR option
[minix.git] / lib / libfetch / http.c
bloba60ac86c0078c80632dd0f255d6a835314d43ebb
1 /* $NetBSD: http.c,v 1.29 2010/01/24 19:10:35 joerg Exp $ */
2 /*-
3 * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav
4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
35 * The following copyright applies to the base64 code:
38 * Copyright 1997 Massachusetts Institute of Technology
40 * Permission to use, copy, modify, and distribute this software and
41 * its documentation for any purpose and without fee is hereby
42 * granted, provided that both the above copyright notice and this
43 * permission notice appear in all copies, that both the above
44 * copyright notice and this permission notice appear in all
45 * supporting documentation, and that the name of M.I.T. not be used
46 * in advertising or publicity pertaining to distribution of the
47 * software without specific, written prior permission. M.I.T. makes
48 * no representations about the suitability of this software for any
49 * purpose. It is provided "as is" without express or implied
50 * warranty.
52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
66 #if defined(__linux__) || defined(__MINT__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 #define _GNU_SOURCE
69 #endif
71 /* Needed for gmtime_r on Interix */
72 #define _REENTRANT
74 #if HAVE_CONFIG_H
75 #include "config.h"
76 #endif
77 #if !defined(NETBSD) && !defined(__minix)
78 #include <nbcompat.h>
79 #endif
81 #include <sys/types.h>
82 #include <sys/socket.h>
84 #include <ctype.h>
85 #include <errno.h>
86 #include <locale.h>
87 #include <stdarg.h>
88 #if !defined(NETBSD) && !defined(__minix)
89 #include <nbcompat/stdio.h>
90 #else
91 #include <stdio.h>
92 #endif
93 #include <stdlib.h>
94 #include <string.h>
95 #include <time.h>
96 #include <unistd.h>
98 #include <netinet/in.h>
99 #include <netinet/tcp.h>
101 #if !defined(NETBSD) && !defined(__minix)
102 #include <nbcompat/netdb.h>
103 #else
104 #include <netdb.h>
105 #endif
107 #include <arpa/inet.h>
109 #include "common.h"
110 #include "httperr.h"
112 /* Maximum number of redirects to follow */
113 #define MAX_REDIRECT 5
115 /* Symbolic names for reply codes we care about */
116 #define HTTP_OK 200
117 #define HTTP_PARTIAL 206
118 #define HTTP_MOVED_PERM 301
119 #define HTTP_MOVED_TEMP 302
120 #define HTTP_SEE_OTHER 303
121 #define HTTP_NOT_MODIFIED 304
122 #define HTTP_TEMP_REDIRECT 307
123 #define HTTP_NEED_AUTH 401
124 #define HTTP_NEED_PROXY_AUTH 407
125 #define HTTP_BAD_RANGE 416
126 #define HTTP_PROTOCOL_ERROR 999
128 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
129 || (xyz) == HTTP_MOVED_TEMP \
130 || (xyz) == HTTP_TEMP_REDIRECT \
131 || (xyz) == HTTP_SEE_OTHER)
133 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
135 #define MINBUFSIZE 4096
137 /*****************************************************************************
138 * I/O functions for decoding chunked streams
141 struct httpio
143 conn_t *conn; /* connection */
144 int chunked; /* chunked mode */
145 int keep_alive; /* keep-alive mode */
146 char *buf; /* chunk buffer */
147 size_t bufsize; /* size of chunk buffer */
148 ssize_t buflen; /* amount of data currently in buffer */
149 int bufpos; /* current read offset in buffer */
150 int eof; /* end-of-file flag */
151 int error; /* error flag */
152 size_t chunksize; /* remaining size of current chunk */
153 off_t contentlength; /* remaining size of the content */
157 * Get next chunk header
159 static int
160 http_new_chunk(struct httpio *io)
162 char *p;
164 if (fetch_getln(io->conn) == -1)
165 return (-1);
167 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
168 return (-1);
170 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
171 if (*p == ';')
172 break;
173 if (!isxdigit((unsigned char)*p))
174 return (-1);
175 if (isdigit((unsigned char)*p)) {
176 io->chunksize = io->chunksize * 16 +
177 *p - '0';
178 } else {
179 io->chunksize = io->chunksize * 16 +
180 10 + tolower((unsigned char)*p) - 'a';
184 return (io->chunksize);
188 * Grow the input buffer to at least len bytes
190 static int
191 http_growbuf(struct httpio *io, size_t len)
193 char *tmp;
195 if (io->bufsize >= len)
196 return (0);
198 if ((tmp = realloc(io->buf, len)) == NULL)
199 return (-1);
200 io->buf = tmp;
201 io->bufsize = len;
202 return (0);
206 * Fill the input buffer, do chunk decoding on the fly
208 static int
209 http_fillbuf(struct httpio *io, size_t len)
211 if (io->error)
212 return (-1);
213 if (io->eof)
214 return (0);
216 if (io->contentlength >= 0 && (off_t)len > io->contentlength)
217 len = io->contentlength;
219 if (io->chunked == 0) {
220 if (http_growbuf(io, len) == -1)
221 return (-1);
222 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
223 io->error = 1;
224 return (-1);
226 if (io->contentlength)
227 io->contentlength -= io->buflen;
228 io->bufpos = 0;
229 return (io->buflen);
232 if (io->chunksize == 0) {
233 switch (http_new_chunk(io)) {
234 case -1:
235 io->error = 1;
236 return (-1);
237 case 0:
238 io->eof = 1;
239 if (fetch_getln(io->conn) == -1)
240 return (-1);
241 return (0);
245 if (len > io->chunksize)
246 len = io->chunksize;
247 if (http_growbuf(io, len) == -1)
248 return (-1);
249 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
250 io->error = 1;
251 return (-1);
253 io->chunksize -= io->buflen;
254 if (io->contentlength >= 0)
255 io->contentlength -= io->buflen;
257 if (io->chunksize == 0) {
258 char endl[2];
259 ssize_t len2;
261 len2 = fetch_read(io->conn, endl, 2);
262 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
263 return (-1);
264 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
265 return (-1);
268 io->bufpos = 0;
270 return (io->buflen);
274 * Read function
276 static ssize_t
277 http_readfn(void *v, void *buf, size_t len)
279 struct httpio *io = (struct httpio *)v;
280 size_t l, pos;
282 if (io->error)
283 return (-1);
284 if (io->eof)
285 return (0);
287 for (pos = 0; len > 0; pos += l, len -= l) {
288 /* empty buffer */
289 if (!io->buf || io->bufpos == io->buflen)
290 if (http_fillbuf(io, len) < 1)
291 break;
292 l = io->buflen - io->bufpos;
293 if (len < l)
294 l = len;
295 memcpy((char *)buf + pos, io->buf + io->bufpos, l);
296 io->bufpos += l;
299 if (!pos && io->error)
300 return (-1);
301 return (pos);
305 * Write function
307 static ssize_t
308 http_writefn(void *v, const void *buf, size_t len)
310 struct httpio *io = (struct httpio *)v;
312 return (fetch_write(io->conn, buf, len));
316 * Close function
318 static void
319 http_closefn(void *v)
321 struct httpio *io = (struct httpio *)v;
323 if (io->keep_alive) {
324 int val;
326 val = 0;
327 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
328 sizeof(val));
329 fetch_cache_put(io->conn, fetch_close);
330 #ifdef TCP_NOPUSH
331 val = 1;
332 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
333 sizeof(val));
334 #endif
335 } else {
336 fetch_close(io->conn);
339 free(io->buf);
340 free(io);
344 * Wrap a file descriptor up
346 static fetchIO *
347 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
349 struct httpio *io;
350 fetchIO *f;
352 if ((io = calloc(1, sizeof(*io))) == NULL) {
353 fetch_syserr();
354 return (NULL);
356 io->conn = conn;
357 io->chunked = chunked;
358 io->contentlength = clength;
359 io->keep_alive = keep_alive;
360 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
361 if (f == NULL) {
362 fetch_syserr();
363 free(io);
364 return (NULL);
366 return (f);
370 /*****************************************************************************
371 * Helper functions for talking to the server and parsing its replies
374 /* Header types */
375 typedef enum {
376 hdr_syserror = -2,
377 hdr_error = -1,
378 hdr_end = 0,
379 hdr_unknown = 1,
380 hdr_connection,
381 hdr_content_length,
382 hdr_content_range,
383 hdr_last_modified,
384 hdr_location,
385 hdr_transfer_encoding,
386 hdr_www_authenticate
387 } hdr_t;
389 /* Names of interesting headers */
390 static struct {
391 hdr_t num;
392 const char *name;
393 } hdr_names[] = {
394 { hdr_connection, "Connection" },
395 { hdr_content_length, "Content-Length" },
396 { hdr_content_range, "Content-Range" },
397 { hdr_last_modified, "Last-Modified" },
398 { hdr_location, "Location" },
399 { hdr_transfer_encoding, "Transfer-Encoding" },
400 { hdr_www_authenticate, "WWW-Authenticate" },
401 { hdr_unknown, NULL },
405 * Send a formatted line; optionally echo to terminal
407 #ifndef __minix
408 static int
409 http_cmd(conn_t *conn, const char *fmt, ...)
411 va_list ap;
412 size_t len;
413 char *msg;
414 int r;
416 va_start(ap, fmt);
417 len = vasprintf(&msg, fmt, ap);
418 va_end(ap);
420 if (msg == NULL) {
421 errno = ENOMEM;
422 fetch_syserr();
423 return (-1);
426 r = fetch_write(conn, msg, len);
427 free(msg);
429 if (r == -1) {
430 fetch_syserr();
431 return (-1);
434 return (0);
436 #else
437 static int
438 http_cmd(conn_t *conn, const char *fmt, ...)
440 va_list ap;
441 size_t len;
442 char msg[MINBUFSIZE];
443 int r;
445 va_start(ap, fmt);
446 len = vsnprintf(&msg[0], MINBUFSIZE, fmt, ap);
447 va_end(ap);
449 if (len >= MINBUFSIZE) {
450 errno = ENOMEM;
451 fetch_syserr();
452 return (-1);
455 r = fetch_write(conn, &msg[0], len);
457 if (r == -1) {
458 fetch_syserr();
459 return (-1);
462 return (0);
464 #endif
466 * Get and parse status line
468 static int
469 http_get_reply(conn_t *conn)
471 char *p;
473 if (fetch_getln(conn) == -1)
474 return (-1);
476 * A valid status line looks like "HTTP/m.n xyz reason" where m
477 * and n are the major and minor protocol version numbers and xyz
478 * is the reply code.
479 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
480 * just one) that do not send a version number, so we can't rely
481 * on finding one, but if we do, insist on it being 1.0 or 1.1.
482 * We don't care about the reason phrase.
484 if (strncmp(conn->buf, "HTTP", 4) != 0)
485 return (HTTP_PROTOCOL_ERROR);
486 p = conn->buf + 4;
487 if (*p == '/') {
488 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
489 return (HTTP_PROTOCOL_ERROR);
490 p += 4;
492 if (*p != ' ' ||
493 !isdigit((unsigned char)p[1]) ||
494 !isdigit((unsigned char)p[2]) ||
495 !isdigit((unsigned char)p[3]))
496 return (HTTP_PROTOCOL_ERROR);
498 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
499 return (conn->err);
503 * Check a header; if the type matches the given string, return a pointer
504 * to the beginning of the value.
506 static const char *
507 http_match(const char *str, const char *hdr)
509 while (*str && *hdr &&
510 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
511 /* nothing */;
512 if (*str || *hdr != ':')
513 return (NULL);
514 while (*hdr && isspace((unsigned char)*++hdr))
515 /* nothing */;
516 return (hdr);
520 * Get the next header and return the appropriate symbolic code.
522 static hdr_t
523 http_next_header(conn_t *conn, const char **p)
525 int i;
527 if (fetch_getln(conn) == -1)
528 return (hdr_syserror);
529 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
530 conn->buflen--;
531 conn->buf[conn->buflen] = '\0';
532 if (conn->buflen == 0)
533 return (hdr_end);
535 * We could check for malformed headers but we don't really care.
536 * A valid header starts with a token immediately followed by a
537 * colon; a token is any sequence of non-control, non-whitespace
538 * characters except "()<>@,;:\\\"{}".
540 for (i = 0; hdr_names[i].num != hdr_unknown; i++)
541 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
542 return (hdr_names[i].num);
543 return (hdr_unknown);
547 * Parse a last-modified header
549 static int
550 http_parse_mtime(const char *p, time_t *mtime)
552 char locale[64], *r;
553 struct tm tm;
555 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
556 setlocale(LC_TIME, "C");
557 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
558 /* XXX should add support for date-2 and date-3 */
559 setlocale(LC_TIME, locale);
560 if (r == NULL)
561 return (-1);
562 *mtime = timegm(&tm);
563 return (0);
567 * Parse a content-length header
569 static int
570 http_parse_length(const char *p, off_t *length)
572 off_t len;
574 for (len = 0; *p && isdigit((unsigned char)*p); ++p)
575 len = len * 10 + (*p - '0');
576 if (*p)
577 return (-1);
578 *length = len;
579 return (0);
583 * Parse a content-range header
585 static int
586 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
588 off_t first, last, len;
590 if (strncasecmp(p, "bytes ", 6) != 0)
591 return (-1);
592 p += 6;
593 if (*p == '*') {
594 first = last = -1;
595 ++p;
596 } else {
597 for (first = 0; *p && isdigit((unsigned char)*p); ++p)
598 first = first * 10 + *p - '0';
599 if (*p != '-')
600 return (-1);
601 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
602 last = last * 10 + *p - '0';
604 if (first > last || *p != '/')
605 return (-1);
606 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
607 len = len * 10 + *p - '0';
608 if (*p || len < last - first + 1)
609 return (-1);
610 if (first == -1)
611 *length = 0;
612 else
613 *length = last - first + 1;
614 *offset = first;
615 *size = len;
616 return (0);
620 /*****************************************************************************
621 * Helper functions for authorization
625 * Base64 encoding
627 static char *
628 http_base64(const char *src)
630 static const char base64[] =
631 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
632 "abcdefghijklmnopqrstuvwxyz"
633 "0123456789+/";
634 char *str, *dst;
635 size_t l;
636 int t, r;
638 l = strlen(src);
639 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
640 return (NULL);
641 dst = str;
642 r = 0;
644 while (l >= 3) {
645 t = (src[0] << 16) | (src[1] << 8) | src[2];
646 dst[0] = base64[(t >> 18) & 0x3f];
647 dst[1] = base64[(t >> 12) & 0x3f];
648 dst[2] = base64[(t >> 6) & 0x3f];
649 dst[3] = base64[(t >> 0) & 0x3f];
650 src += 3; l -= 3;
651 dst += 4; r += 4;
654 switch (l) {
655 case 2:
656 t = (src[0] << 16) | (src[1] << 8);
657 dst[0] = base64[(t >> 18) & 0x3f];
658 dst[1] = base64[(t >> 12) & 0x3f];
659 dst[2] = base64[(t >> 6) & 0x3f];
660 dst[3] = '=';
661 dst += 4;
662 r += 4;
663 break;
664 case 1:
665 t = src[0] << 16;
666 dst[0] = base64[(t >> 18) & 0x3f];
667 dst[1] = base64[(t >> 12) & 0x3f];
668 dst[2] = dst[3] = '=';
669 dst += 4;
670 r += 4;
671 break;
672 case 0:
673 break;
676 *dst = 0;
677 return (str);
681 * Encode username and password
683 #ifndef __minix
684 static int
685 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
687 char *upw, *auth;
688 int r;
690 if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
691 return (-1);
692 auth = http_base64(upw);
693 free(upw);
694 if (auth == NULL)
695 return (-1);
696 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
697 free(auth);
698 return (r);
700 #else
701 static int
702 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
704 char upw[MINBUFSIZE], *auth;
705 int len, r;
707 len = snprintf(&upw[0], MINBUFSIZE, "%s:%s", usr, pwd);
708 if (len >= MINBUFSIZE)
709 return -1;
710 auth = http_base64(&upw[0]);
711 if (auth == NULL)
712 return (-1);
713 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
714 free(auth);
715 return (r);
717 #endif
719 * Send an authorization header
721 static int
722 http_authorize(conn_t *conn, const char *hdr, const char *p)
724 /* basic authorization */
725 if (strncasecmp(p, "basic:", 6) == 0) {
726 char *user, *pwd, *str;
727 int r;
729 /* skip realm */
730 for (p += 6; *p && *p != ':'; ++p)
731 /* nothing */ ;
732 if (!*p || strchr(++p, ':') == NULL)
733 return (-1);
734 if ((str = strdup(p)) == NULL)
735 return (-1); /* XXX */
736 user = str;
737 pwd = strchr(str, ':');
738 *pwd++ = '\0';
739 r = http_basic_auth(conn, hdr, user, pwd);
740 free(str);
741 return (r);
743 return (-1);
747 /*****************************************************************************
748 * Helper functions for connecting to a server or proxy
752 * Connect to the correct HTTP server or proxy.
754 static conn_t *
755 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
757 conn_t *conn;
758 int af, verbose;
759 #ifdef TCP_NOPUSH
760 int val;
761 #endif
763 *cached = 1;
765 #ifdef INET6
766 af = AF_UNSPEC;
767 #else
768 af = AF_INET;
769 #endif
771 verbose = CHECK_FLAG('v');
772 if (CHECK_FLAG('4'))
773 af = AF_INET;
774 #ifdef INET6
775 else if (CHECK_FLAG('6'))
776 af = AF_INET6;
777 #endif
779 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
780 URL = purl;
781 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
782 /* can't talk http to an ftp server */
783 /* XXX should set an error code */
784 return (NULL);
787 if ((conn = fetch_cache_get(URL, af)) != NULL) {
788 *cached = 1;
789 return (conn);
792 if ((conn = fetch_connect(URL, af, verbose)) == NULL)
793 /* fetch_connect() has already set an error code */
794 return (NULL);
795 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
796 fetch_ssl(conn, verbose) == -1) {
797 fetch_close(conn);
798 /* grrr */
799 #ifdef EAUTH
800 errno = EAUTH;
801 #else
802 errno = EPERM;
803 #endif
804 fetch_syserr();
805 return (NULL);
808 #ifdef TCP_NOPUSH
809 val = 1;
810 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
811 #endif
813 return (conn);
816 static struct url *
817 http_get_proxy(struct url * url, const char *flags)
819 struct url *purl;
820 char *p;
822 if (flags != NULL && strchr(flags, 'd') != NULL)
823 return (NULL);
824 if (fetch_no_proxy_match(url->host))
825 return (NULL);
826 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
827 *p && (purl = fetchParseURL(p))) {
828 if (!*purl->scheme)
829 strcpy(purl->scheme, SCHEME_HTTP);
830 if (!purl->port)
831 purl->port = fetch_default_proxy_port(purl->scheme);
832 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
833 return (purl);
834 fetchFreeURL(purl);
836 return (NULL);
839 static void
840 set_if_modified_since(conn_t *conn, time_t last_modified)
842 static const char weekdays[] = "SunMonTueWedThuFriSat";
843 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
844 struct tm tm;
845 char buf[80];
846 gmtime_r(&last_modified, &tm);
847 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT",
848 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
849 tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
850 http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
854 /*****************************************************************************
855 * Core
859 * Send a request and process the reply
861 * XXX This function is way too long, the do..while loop should be split
862 * XXX off into a separate function.
864 fetchIO *
865 http_request(struct url *URL, const char *op, struct url_stat *us,
866 struct url *purl, const char *flags)
868 conn_t *conn;
869 struct url *url, *new;
870 int chunked, direct, if_modified_since, need_auth, noredirect;
871 int keep_alive, verbose, cached;
872 int e, i, n, val;
873 off_t offset, clength, length, size;
874 time_t mtime;
875 const char *p;
876 fetchIO *f;
877 hdr_t h;
878 char hbuf[URL_HOSTLEN + 7], *host;
880 direct = CHECK_FLAG('d');
881 noredirect = CHECK_FLAG('A');
882 verbose = CHECK_FLAG('v');
883 if_modified_since = CHECK_FLAG('i');
884 keep_alive = 0;
886 if (direct && purl) {
887 fetchFreeURL(purl);
888 purl = NULL;
891 /* try the provided URL first */
892 url = URL;
894 /* if the A flag is set, we only get one try */
895 n = noredirect ? 1 : MAX_REDIRECT;
896 i = 0;
898 e = HTTP_PROTOCOL_ERROR;
899 need_auth = 0;
900 do {
901 new = NULL;
902 chunked = 0;
903 offset = 0;
904 clength = -1;
905 length = -1;
906 size = -1;
907 mtime = 0;
909 /* check port */
910 if (!url->port)
911 url->port = fetch_default_port(url->scheme);
913 /* were we redirected to an FTP URL? */
914 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
915 if (strcmp(op, "GET") == 0)
916 return (ftp_request(url, "RETR", NULL, us, purl, flags));
917 else if (strcmp(op, "HEAD") == 0)
918 return (ftp_request(url, "STAT", NULL, us, purl, flags));
921 /* connect to server or proxy */
922 if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
923 goto ouch;
925 host = url->host;
926 #ifdef INET6
927 if (strchr(url->host, ':')) {
928 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
929 host = hbuf;
931 #endif
932 if (url->port != fetch_default_port(url->scheme)) {
933 if (host != hbuf) {
934 strcpy(hbuf, host);
935 host = hbuf;
937 snprintf(hbuf + strlen(hbuf),
938 sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
941 /* send request */
942 if (verbose)
943 fetch_info("requesting %s://%s%s",
944 url->scheme, host, url->doc);
945 if (purl) {
946 http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
947 op, url->scheme, host, url->doc);
948 } else {
949 http_cmd(conn, "%s %s HTTP/1.1\r\n",
950 op, url->doc);
953 if (if_modified_since && url->last_modified > 0)
954 set_if_modified_since(conn, url->last_modified);
956 /* virtual host */
957 http_cmd(conn, "Host: %s\r\n", host);
959 /* proxy authorization */
960 if (purl) {
961 if (*purl->user || *purl->pwd)
962 http_basic_auth(conn, "Proxy-Authorization",
963 purl->user, purl->pwd);
964 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
965 http_authorize(conn, "Proxy-Authorization", p);
968 /* server authorization */
969 if (need_auth || *url->user || *url->pwd) {
970 if (*url->user || *url->pwd)
971 http_basic_auth(conn, "Authorization", url->user, url->pwd);
972 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
973 http_authorize(conn, "Authorization", p);
974 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
975 http_basic_auth(conn, "Authorization", url->user, url->pwd);
976 } else {
977 http_seterr(HTTP_NEED_AUTH);
978 goto ouch;
982 /* other headers */
983 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
984 if (strcasecmp(p, "auto") == 0)
985 http_cmd(conn, "Referer: %s://%s%s\r\n",
986 url->scheme, host, url->doc);
987 else
988 http_cmd(conn, "Referer: %s\r\n", p);
990 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
991 http_cmd(conn, "User-Agent: %s\r\n", p);
992 else
993 http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
994 #ifndef __minix
995 if (url->offset > 0)
996 http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
997 #else
998 if (url->offset > 0)
999 http_cmd(conn, "Range: bytes=%ld-\r\n", (long)url->offset);
1000 #endif
1001 http_cmd(conn, "\r\n");
1004 * Force the queued request to be dispatched. Normally, one
1005 * would do this with shutdown(2) but squid proxies can be
1006 * configured to disallow such half-closed connections. To
1007 * be compatible with such configurations, fiddle with socket
1008 * options to force the pending data to be written.
1010 #ifdef TCP_NOPUSH
1011 val = 0;
1012 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1013 sizeof(val));
1014 #endif
1015 val = 1;
1016 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1017 sizeof(val));
1019 /* get reply */
1020 switch (http_get_reply(conn)) {
1021 case HTTP_OK:
1022 case HTTP_PARTIAL:
1023 case HTTP_NOT_MODIFIED:
1024 /* fine */
1025 break;
1026 case HTTP_MOVED_PERM:
1027 case HTTP_MOVED_TEMP:
1028 case HTTP_SEE_OTHER:
1030 * Not so fine, but we still have to read the
1031 * headers to get the new location.
1033 break;
1034 case HTTP_NEED_AUTH:
1035 if (need_auth) {
1037 * We already sent out authorization code,
1038 * so there's nothing more we can do.
1040 http_seterr(conn->err);
1041 goto ouch;
1043 /* try again, but send the password this time */
1044 if (verbose)
1045 fetch_info("server requires authorization");
1046 break;
1047 case HTTP_NEED_PROXY_AUTH:
1049 * If we're talking to a proxy, we already sent
1050 * our proxy authorization code, so there's
1051 * nothing more we can do.
1053 http_seterr(conn->err);
1054 goto ouch;
1055 case HTTP_BAD_RANGE:
1057 * This can happen if we ask for 0 bytes because
1058 * we already have the whole file. Consider this
1059 * a success for now, and check sizes later.
1061 break;
1062 case HTTP_PROTOCOL_ERROR:
1063 /* fall through */
1064 case -1:
1065 --i;
1066 if (cached)
1067 continue;
1068 fetch_syserr();
1069 goto ouch;
1070 default:
1071 http_seterr(conn->err);
1072 if (!verbose)
1073 goto ouch;
1074 /* fall through so we can get the full error message */
1077 /* get headers */
1078 do {
1079 switch ((h = http_next_header(conn, &p))) {
1080 case hdr_syserror:
1081 fetch_syserr();
1082 goto ouch;
1083 case hdr_error:
1084 http_seterr(HTTP_PROTOCOL_ERROR);
1085 goto ouch;
1086 case hdr_connection:
1087 /* XXX too weak? */
1088 keep_alive = (strcasecmp(p, "keep-alive") == 0);
1089 break;
1090 case hdr_content_length:
1091 http_parse_length(p, &clength);
1092 break;
1093 case hdr_content_range:
1094 http_parse_range(p, &offset, &length, &size);
1095 break;
1096 case hdr_last_modified:
1097 http_parse_mtime(p, &mtime);
1098 break;
1099 case hdr_location:
1100 if (!HTTP_REDIRECT(conn->err))
1101 break;
1102 if (new)
1103 free(new);
1104 if (verbose)
1105 fetch_info("%d redirect to %s", conn->err, p);
1106 if (*p == '/')
1107 /* absolute path */
1108 new = fetchMakeURL(url->scheme, url->host, url->port, p,
1109 url->user, url->pwd);
1110 else
1111 new = fetchParseURL(p);
1112 if (new == NULL) {
1113 /* XXX should set an error code */
1114 goto ouch;
1116 if (!*new->user && !*new->pwd) {
1117 strcpy(new->user, url->user);
1118 strcpy(new->pwd, url->pwd);
1120 new->offset = url->offset;
1121 new->length = url->length;
1122 break;
1123 case hdr_transfer_encoding:
1124 /* XXX weak test*/
1125 chunked = (strcasecmp(p, "chunked") == 0);
1126 break;
1127 case hdr_www_authenticate:
1128 if (conn->err != HTTP_NEED_AUTH)
1129 break;
1130 /* if we were smarter, we'd check the method and realm */
1131 break;
1132 case hdr_end:
1133 /* fall through */
1134 case hdr_unknown:
1135 /* ignore */
1136 break;
1138 } while (h > hdr_end);
1140 /* we need to provide authentication */
1141 if (conn->err == HTTP_NEED_AUTH) {
1142 e = conn->err;
1143 need_auth = 1;
1144 fetch_close(conn);
1145 conn = NULL;
1146 continue;
1149 /* requested range not satisfiable */
1150 if (conn->err == HTTP_BAD_RANGE) {
1151 if (url->offset == size && url->length == 0) {
1152 /* asked for 0 bytes; fake it */
1153 offset = url->offset;
1154 conn->err = HTTP_OK;
1155 break;
1156 } else {
1157 http_seterr(conn->err);
1158 goto ouch;
1162 /* we have a hit or an error */
1163 if (conn->err == HTTP_OK ||
1164 conn->err == HTTP_PARTIAL ||
1165 conn->err == HTTP_NOT_MODIFIED ||
1166 HTTP_ERROR(conn->err))
1167 break;
1169 /* all other cases: we got a redirect */
1170 e = conn->err;
1171 need_auth = 0;
1172 fetch_close(conn);
1173 conn = NULL;
1174 if (!new)
1175 break;
1176 if (url != URL)
1177 fetchFreeURL(url);
1178 url = new;
1179 } while (++i < n);
1181 /* we failed, or ran out of retries */
1182 if (conn == NULL) {
1183 http_seterr(e);
1184 goto ouch;
1187 /* check for inconsistencies */
1188 if (clength != -1 && length != -1 && clength != length) {
1189 http_seterr(HTTP_PROTOCOL_ERROR);
1190 goto ouch;
1192 if (clength == -1)
1193 clength = length;
1194 if (clength != -1)
1195 length = offset + clength;
1196 if (length != -1 && size != -1 && length != size) {
1197 http_seterr(HTTP_PROTOCOL_ERROR);
1198 goto ouch;
1200 if (size == -1)
1201 size = length;
1203 /* fill in stats */
1204 if (us) {
1205 us->size = size;
1206 us->atime = us->mtime = mtime;
1209 /* too far? */
1210 if (URL->offset > 0 && offset > URL->offset) {
1211 http_seterr(HTTP_PROTOCOL_ERROR);
1212 goto ouch;
1215 /* report back real offset and size */
1216 URL->offset = offset;
1217 URL->length = clength;
1219 if (clength == -1 && !chunked)
1220 keep_alive = 0;
1222 if (conn->err == HTTP_NOT_MODIFIED) {
1223 http_seterr(HTTP_NOT_MODIFIED);
1224 if (keep_alive) {
1225 fetch_cache_put(conn, fetch_close);
1226 conn = NULL;
1228 goto ouch;
1231 /* wrap it up in a fetchIO */
1232 if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1233 fetch_syserr();
1234 goto ouch;
1237 if (url != URL)
1238 fetchFreeURL(url);
1239 if (purl)
1240 fetchFreeURL(purl);
1242 if (HTTP_ERROR(conn->err)) {
1244 if (keep_alive) {
1245 char buf[512];
1246 do {
1247 } while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1250 fetchIO_close(f);
1251 f = NULL;
1254 return (f);
1256 ouch:
1257 if (url != URL)
1258 fetchFreeURL(url);
1259 if (purl)
1260 fetchFreeURL(purl);
1261 if (conn != NULL)
1262 fetch_close(conn);
1263 return (NULL);
1267 /*****************************************************************************
1268 * Entry points
1272 * Retrieve and stat a file by HTTP
1274 fetchIO *
1275 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1277 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1281 * Retrieve a file by HTTP
1283 fetchIO *
1284 fetchGetHTTP(struct url *URL, const char *flags)
1286 return (fetchXGetHTTP(URL, NULL, flags));
1290 * Store a file by HTTP
1292 fetchIO *
1293 fetchPutHTTP(struct url *URL, const char *flags)
1295 fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1296 return (NULL);
1300 * Get an HTTP document's metadata
1303 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1305 fetchIO *f;
1307 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1308 if (f == NULL)
1309 return (-1);
1310 fetchIO_close(f);
1311 return (0);
1314 enum http_states {
1315 ST_NONE,
1316 ST_LT,
1317 ST_LTA,
1318 ST_TAGA,
1319 ST_H,
1320 ST_R,
1321 ST_E,
1322 ST_F,
1323 ST_HREF,
1324 ST_HREFQ,
1325 ST_TAG,
1326 ST_TAGAX,
1327 ST_TAGAQ
1330 struct index_parser {
1331 struct url_list *ue;
1332 struct url *url;
1333 enum http_states state;
1336 static ssize_t
1337 parse_index(struct index_parser *parser, const char *buf, size_t len)
1339 char *end_attr, p = *buf;
1341 switch (parser->state) {
1342 case ST_NONE:
1343 /* Plain text, not in markup */
1344 if (p == '<')
1345 parser->state = ST_LT;
1346 return 1;
1347 case ST_LT:
1348 /* In tag -- "<" already found */
1349 if (p == '>')
1350 parser->state = ST_NONE;
1351 else if (p == 'a' || p == 'A')
1352 parser->state = ST_LTA;
1353 else if (!isspace((unsigned char)p))
1354 parser->state = ST_TAG;
1355 return 1;
1356 case ST_LTA:
1357 /* In tag -- "<a" already found */
1358 if (p == '>')
1359 parser->state = ST_NONE;
1360 else if (p == '"')
1361 parser->state = ST_TAGAQ;
1362 else if (isspace((unsigned char)p))
1363 parser->state = ST_TAGA;
1364 else
1365 parser->state = ST_TAG;
1366 return 1;
1367 case ST_TAG:
1368 /* In tag, but not "<a" -- disregard */
1369 if (p == '>')
1370 parser->state = ST_NONE;
1371 return 1;
1372 case ST_TAGA:
1373 /* In a-tag -- "<a " already found */
1374 if (p == '>')
1375 parser->state = ST_NONE;
1376 else if (p == '"')
1377 parser->state = ST_TAGAQ;
1378 else if (p == 'h' || p == 'H')
1379 parser->state = ST_H;
1380 else if (!isspace((unsigned char)p))
1381 parser->state = ST_TAGAX;
1382 return 1;
1383 case ST_TAGAX:
1384 /* In unknown keyword in a-tag */
1385 if (p == '>')
1386 parser->state = ST_NONE;
1387 else if (p == '"')
1388 parser->state = ST_TAGAQ;
1389 else if (isspace((unsigned char)p))
1390 parser->state = ST_TAGA;
1391 return 1;
1392 case ST_TAGAQ:
1393 /* In a-tag, unknown argument for keys. */
1394 if (p == '>')
1395 parser->state = ST_NONE;
1396 else if (p == '"')
1397 parser->state = ST_TAGA;
1398 return 1;
1399 case ST_H:
1400 /* In a-tag -- "<a h" already found */
1401 if (p == '>')
1402 parser->state = ST_NONE;
1403 else if (p == '"')
1404 parser->state = ST_TAGAQ;
1405 else if (p == 'r' || p == 'R')
1406 parser->state = ST_R;
1407 else if (isspace((unsigned char)p))
1408 parser->state = ST_TAGA;
1409 else
1410 parser->state = ST_TAGAX;
1411 return 1;
1412 case ST_R:
1413 /* In a-tag -- "<a hr" already found */
1414 if (p == '>')
1415 parser->state = ST_NONE;
1416 else if (p == '"')
1417 parser->state = ST_TAGAQ;
1418 else if (p == 'e' || p == 'E')
1419 parser->state = ST_E;
1420 else if (isspace((unsigned char)p))
1421 parser->state = ST_TAGA;
1422 else
1423 parser->state = ST_TAGAX;
1424 return 1;
1425 case ST_E:
1426 /* In a-tag -- "<a hre" already found */
1427 if (p == '>')
1428 parser->state = ST_NONE;
1429 else if (p == '"')
1430 parser->state = ST_TAGAQ;
1431 else if (p == 'f' || p == 'F')
1432 parser->state = ST_F;
1433 else if (isspace((unsigned char)p))
1434 parser->state = ST_TAGA;
1435 else
1436 parser->state = ST_TAGAX;
1437 return 1;
1438 case ST_F:
1439 /* In a-tag -- "<a href" already found */
1440 if (p == '>')
1441 parser->state = ST_NONE;
1442 else if (p == '"')
1443 parser->state = ST_TAGAQ;
1444 else if (p == '=')
1445 parser->state = ST_HREF;
1446 else if (!isspace((unsigned char)p))
1447 parser->state = ST_TAGAX;
1448 return 1;
1449 case ST_HREF:
1450 /* In a-tag -- "<a href=" already found */
1451 if (p == '>')
1452 parser->state = ST_NONE;
1453 else if (p == '"')
1454 parser->state = ST_HREFQ;
1455 else if (!isspace((unsigned char)p))
1456 parser->state = ST_TAGA;
1457 return 1;
1458 case ST_HREFQ:
1459 /* In href of the a-tag */
1460 end_attr = memchr(buf, '"', len);
1461 if (end_attr == NULL)
1462 return 0;
1463 *end_attr = '\0';
1464 parser->state = ST_TAGA;
1465 if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1466 return -1;
1467 return end_attr + 1 - buf;
1469 /* NOTREACHED */
1470 abort();
1473 struct http_index_cache {
1474 struct http_index_cache *next;
1475 struct url *location;
1476 struct url_list ue;
1479 static struct http_index_cache *index_cache;
1482 * List a directory
1485 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern, const char *flags)
1487 fetchIO *f;
1488 char buf[2 * PATH_MAX];
1489 size_t buf_len, sum_processed;
1490 ssize_t read_len, processed;
1491 struct index_parser state;
1492 struct http_index_cache *cache = NULL;
1493 int do_cache, ret;
1495 do_cache = CHECK_FLAG('c');
1497 if (do_cache) {
1498 for (cache = index_cache; cache != NULL; cache = cache->next) {
1499 if (strcmp(cache->location->scheme, url->scheme))
1500 continue;
1501 if (strcmp(cache->location->user, url->user))
1502 continue;
1503 if (strcmp(cache->location->pwd, url->pwd))
1504 continue;
1505 if (strcmp(cache->location->host, url->host))
1506 continue;
1507 if (cache->location->port != url->port)
1508 continue;
1509 if (strcmp(cache->location->doc, url->doc))
1510 continue;
1511 return fetchAppendURLList(ue, &cache->ue);
1514 cache = malloc(sizeof(*cache));
1515 fetchInitURLList(&cache->ue);
1516 cache->location = fetchCopyURL(url);
1519 f = fetchGetHTTP(url, flags);
1520 if (f == NULL) {
1521 if (do_cache) {
1522 fetchFreeURLList(&cache->ue);
1523 fetchFreeURL(cache->location);
1524 free(cache);
1526 return -1;
1529 state.url = url;
1530 state.state = ST_NONE;
1531 if (do_cache) {
1532 state.ue = &cache->ue;
1533 } else {
1534 state.ue = ue;
1537 buf_len = 0;
1539 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1540 buf_len += read_len;
1541 sum_processed = 0;
1542 do {
1543 processed = parse_index(&state, buf + sum_processed, buf_len);
1544 if (processed == -1)
1545 break;
1546 buf_len -= processed;
1547 sum_processed += processed;
1548 } while (processed != 0 && buf_len > 0);
1549 if (processed == -1) {
1550 read_len = -1;
1551 break;
1553 memmove(buf, buf + sum_processed, buf_len);
1556 fetchIO_close(f);
1558 ret = read_len < 0 ? -1 : 0;
1560 if (do_cache) {
1561 if (ret == 0) {
1562 cache->next = index_cache;
1563 index_cache = cache;
1566 if (fetchAppendURLList(ue, &cache->ue))
1567 ret = -1;
1570 return ret;