custom message type for VM_QUERY_EXIT
[minix3.git] / lib / libfetch / http.c
blob5ff7da749b1ebd27c810668c21cd7e5a3162d7aa
1 /* $NetBSD: http.c,v 1.29 2010/01/24 19:10:35 joerg Exp $ */
2 /*-
3 * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav
4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
35 * The following copyright applies to the base64 code:
38 * Copyright 1997 Massachusetts Institute of Technology
40 * Permission to use, copy, modify, and distribute this software and
41 * its documentation for any purpose and without fee is hereby
42 * granted, provided that both the above copyright notice and this
43 * permission notice appear in all copies, that both the above
44 * copyright notice and this permission notice appear in all
45 * supporting documentation, and that the name of M.I.T. not be used
46 * in advertising or publicity pertaining to distribution of the
47 * software without specific, written prior permission. M.I.T. makes
48 * no representations about the suitability of this software for any
49 * purpose. It is provided "as is" without express or implied
50 * warranty.
52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
66 #if defined(__linux__) || defined(__MINT__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 #define _GNU_SOURCE
69 #endif
71 #ifndef __minix
72 /* Needed for gmtime_r on Interix */
73 #define _REENTRANT
74 #endif
76 #if HAVE_CONFIG_H
77 #include "config.h"
78 #endif
79 #if !defined(NETBSD) && !defined(__minix)
80 #include <nbcompat.h>
81 #endif
83 #include <sys/types.h>
84 #include <sys/socket.h>
86 #include <ctype.h>
87 #include <errno.h>
88 #include <locale.h>
89 #include <stdarg.h>
90 #if !defined(NETBSD) && !defined(__minix)
91 #include <nbcompat/stdio.h>
92 #else
93 #include <stdio.h>
94 #endif
95 #include <stdlib.h>
96 #include <string.h>
97 #include <time.h>
98 #include <unistd.h>
100 #include <netinet/in.h>
101 #include <netinet/tcp.h>
103 #if !defined(NETBSD) && !defined(__minix)
104 #include <nbcompat/netdb.h>
105 #else
106 #include <netdb.h>
107 #endif
109 #include <arpa/inet.h>
111 #include "common.h"
112 #include "httperr.h"
114 /* Maximum number of redirects to follow */
115 #define MAX_REDIRECT 5
117 /* Symbolic names for reply codes we care about */
118 #define HTTP_OK 200
119 #define HTTP_PARTIAL 206
120 #define HTTP_MOVED_PERM 301
121 #define HTTP_MOVED_TEMP 302
122 #define HTTP_SEE_OTHER 303
123 #define HTTP_NOT_MODIFIED 304
124 #define HTTP_TEMP_REDIRECT 307
125 #define HTTP_NEED_AUTH 401
126 #define HTTP_NEED_PROXY_AUTH 407
127 #define HTTP_BAD_RANGE 416
128 #define HTTP_PROTOCOL_ERROR 999
130 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
131 || (xyz) == HTTP_MOVED_TEMP \
132 || (xyz) == HTTP_TEMP_REDIRECT \
133 || (xyz) == HTTP_SEE_OTHER)
135 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
137 #define MINBUFSIZE 4096
139 /*****************************************************************************
140 * I/O functions for decoding chunked streams
143 struct httpio
145 conn_t *conn; /* connection */
146 int chunked; /* chunked mode */
147 int keep_alive; /* keep-alive mode */
148 char *buf; /* chunk buffer */
149 size_t bufsize; /* size of chunk buffer */
150 ssize_t buflen; /* amount of data currently in buffer */
151 int bufpos; /* current read offset in buffer */
152 int eof; /* end-of-file flag */
153 int error; /* error flag */
154 size_t chunksize; /* remaining size of current chunk */
155 off_t contentlength; /* remaining size of the content */
159 * Get next chunk header
161 static int
162 http_new_chunk(struct httpio *io)
164 char *p;
166 if (fetch_getln(io->conn) == -1)
167 return (-1);
169 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
170 return (-1);
172 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
173 if (*p == ';')
174 break;
175 if (!isxdigit((unsigned char)*p))
176 return (-1);
177 if (isdigit((unsigned char)*p)) {
178 io->chunksize = io->chunksize * 16 +
179 *p - '0';
180 } else {
181 io->chunksize = io->chunksize * 16 +
182 10 + tolower((unsigned char)*p) - 'a';
186 return (io->chunksize);
190 * Grow the input buffer to at least len bytes
192 static int
193 http_growbuf(struct httpio *io, size_t len)
195 char *tmp;
197 if (io->bufsize >= len)
198 return (0);
200 if ((tmp = realloc(io->buf, len)) == NULL)
201 return (-1);
202 io->buf = tmp;
203 io->bufsize = len;
204 return (0);
208 * Fill the input buffer, do chunk decoding on the fly
210 static int
211 http_fillbuf(struct httpio *io, size_t len)
213 if (io->error)
214 return (-1);
215 if (io->eof)
216 return (0);
218 if (io->contentlength >= 0 && (off_t)len > io->contentlength)
219 len = io->contentlength;
221 if (io->chunked == 0) {
222 if (http_growbuf(io, len) == -1)
223 return (-1);
224 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
225 io->error = 1;
226 return (-1);
228 if (io->contentlength)
229 io->contentlength -= io->buflen;
230 io->bufpos = 0;
231 return (io->buflen);
234 if (io->chunksize == 0) {
235 switch (http_new_chunk(io)) {
236 case -1:
237 io->error = 1;
238 return (-1);
239 case 0:
240 io->eof = 1;
241 if (fetch_getln(io->conn) == -1)
242 return (-1);
243 return (0);
247 if (len > io->chunksize)
248 len = io->chunksize;
249 if (http_growbuf(io, len) == -1)
250 return (-1);
251 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
252 io->error = 1;
253 return (-1);
255 io->chunksize -= io->buflen;
256 if (io->contentlength >= 0)
257 io->contentlength -= io->buflen;
259 if (io->chunksize == 0) {
260 char endl[2];
261 ssize_t len2;
263 len2 = fetch_read(io->conn, endl, 2);
264 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
265 return (-1);
266 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
267 return (-1);
270 io->bufpos = 0;
272 return (io->buflen);
276 * Read function
278 static ssize_t
279 http_readfn(void *v, void *buf, size_t len)
281 struct httpio *io = (struct httpio *)v;
282 size_t l, pos;
284 if (io->error)
285 return (-1);
286 if (io->eof)
287 return (0);
289 for (pos = 0; len > 0; pos += l, len -= l) {
290 /* empty buffer */
291 if (!io->buf || io->bufpos == io->buflen)
292 if (http_fillbuf(io, len) < 1)
293 break;
294 l = io->buflen - io->bufpos;
295 if (len < l)
296 l = len;
297 memcpy((char *)buf + pos, io->buf + io->bufpos, l);
298 io->bufpos += l;
301 if (!pos && io->error)
302 return (-1);
303 return (pos);
307 * Write function
309 static ssize_t
310 http_writefn(void *v, const void *buf, size_t len)
312 struct httpio *io = (struct httpio *)v;
314 return (fetch_write(io->conn, buf, len));
318 * Close function
320 static void
321 http_closefn(void *v)
323 struct httpio *io = (struct httpio *)v;
325 if (io->keep_alive) {
326 int val;
328 val = 0;
329 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
330 sizeof(val));
331 fetch_cache_put(io->conn, fetch_close);
332 #ifdef TCP_NOPUSH
333 val = 1;
334 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
335 sizeof(val));
336 #endif
337 } else {
338 fetch_close(io->conn);
341 free(io->buf);
342 free(io);
346 * Wrap a file descriptor up
348 static fetchIO *
349 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
351 struct httpio *io;
352 fetchIO *f;
354 if ((io = calloc(1, sizeof(*io))) == NULL) {
355 fetch_syserr();
356 return (NULL);
358 io->conn = conn;
359 io->chunked = chunked;
360 io->contentlength = clength;
361 io->keep_alive = keep_alive;
362 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
363 if (f == NULL) {
364 fetch_syserr();
365 free(io);
366 return (NULL);
368 return (f);
372 /*****************************************************************************
373 * Helper functions for talking to the server and parsing its replies
376 /* Header types */
377 typedef enum {
378 hdr_syserror = -2,
379 hdr_error = -1,
380 hdr_end = 0,
381 hdr_unknown = 1,
382 hdr_connection,
383 hdr_content_length,
384 hdr_content_range,
385 hdr_last_modified,
386 hdr_location,
387 hdr_transfer_encoding,
388 hdr_www_authenticate
389 } hdr_t;
391 /* Names of interesting headers */
392 static struct {
393 hdr_t num;
394 const char *name;
395 } hdr_names[] = {
396 { hdr_connection, "Connection" },
397 { hdr_content_length, "Content-Length" },
398 { hdr_content_range, "Content-Range" },
399 { hdr_last_modified, "Last-Modified" },
400 { hdr_location, "Location" },
401 { hdr_transfer_encoding, "Transfer-Encoding" },
402 { hdr_www_authenticate, "WWW-Authenticate" },
403 { hdr_unknown, NULL },
407 * Send a formatted line; optionally echo to terminal
409 #ifndef __minix
410 static int
411 http_cmd(conn_t *conn, const char *fmt, ...)
413 va_list ap;
414 size_t len;
415 char *msg;
416 int r;
418 va_start(ap, fmt);
419 len = vasprintf(&msg, fmt, ap);
420 va_end(ap);
422 if (msg == NULL) {
423 errno = ENOMEM;
424 fetch_syserr();
425 return (-1);
428 r = fetch_write(conn, msg, len);
429 free(msg);
431 if (r == -1) {
432 fetch_syserr();
433 return (-1);
436 return (0);
438 #else
439 static int
440 http_cmd(conn_t *conn, const char *fmt, ...)
442 va_list ap;
443 size_t len;
444 char msg[MINBUFSIZE];
445 int r;
447 va_start(ap, fmt);
448 len = vsnprintf(&msg[0], MINBUFSIZE, fmt, ap);
449 va_end(ap);
451 if (len >= MINBUFSIZE) {
452 errno = ENOMEM;
453 fetch_syserr();
454 return (-1);
457 r = fetch_write(conn, &msg[0], len);
459 if (r == -1) {
460 fetch_syserr();
461 return (-1);
464 return (0);
466 #endif
468 * Get and parse status line
470 static int
471 http_get_reply(conn_t *conn)
473 char *p;
475 if (fetch_getln(conn) == -1)
476 return (-1);
478 * A valid status line looks like "HTTP/m.n xyz reason" where m
479 * and n are the major and minor protocol version numbers and xyz
480 * is the reply code.
481 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
482 * just one) that do not send a version number, so we can't rely
483 * on finding one, but if we do, insist on it being 1.0 or 1.1.
484 * We don't care about the reason phrase.
486 if (strncmp(conn->buf, "HTTP", 4) != 0)
487 return (HTTP_PROTOCOL_ERROR);
488 p = conn->buf + 4;
489 if (*p == '/') {
490 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
491 return (HTTP_PROTOCOL_ERROR);
492 p += 4;
494 if (*p != ' ' ||
495 !isdigit((unsigned char)p[1]) ||
496 !isdigit((unsigned char)p[2]) ||
497 !isdigit((unsigned char)p[3]))
498 return (HTTP_PROTOCOL_ERROR);
500 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
501 return (conn->err);
505 * Check a header; if the type matches the given string, return a pointer
506 * to the beginning of the value.
508 static const char *
509 http_match(const char *str, const char *hdr)
511 while (*str && *hdr &&
512 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
513 /* nothing */;
514 if (*str || *hdr != ':')
515 return (NULL);
516 while (*hdr && isspace((unsigned char)*++hdr))
517 /* nothing */;
518 return (hdr);
522 * Get the next header and return the appropriate symbolic code.
524 static hdr_t
525 http_next_header(conn_t *conn, const char **p)
527 int i;
529 if (fetch_getln(conn) == -1)
530 return (hdr_syserror);
531 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
532 conn->buflen--;
533 conn->buf[conn->buflen] = '\0';
534 if (conn->buflen == 0)
535 return (hdr_end);
537 * We could check for malformed headers but we don't really care.
538 * A valid header starts with a token immediately followed by a
539 * colon; a token is any sequence of non-control, non-whitespace
540 * characters except "()<>@,;:\\\"{}".
542 for (i = 0; hdr_names[i].num != hdr_unknown; i++)
543 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
544 return (hdr_names[i].num);
545 return (hdr_unknown);
549 * Parse a last-modified header
551 static int
552 http_parse_mtime(const char *p, time_t *mtime)
554 char locale[64], *r;
555 struct tm tm;
557 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
558 setlocale(LC_TIME, "C");
559 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
560 /* XXX should add support for date-2 and date-3 */
561 setlocale(LC_TIME, locale);
562 if (r == NULL)
563 return (-1);
564 *mtime = timegm(&tm);
565 return (0);
569 * Parse a content-length header
571 static int
572 http_parse_length(const char *p, off_t *length)
574 off_t len;
576 for (len = 0; *p && isdigit((unsigned char)*p); ++p)
577 len = len * 10 + (*p - '0');
578 if (*p)
579 return (-1);
580 *length = len;
581 return (0);
585 * Parse a content-range header
587 static int
588 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
590 off_t first, last, len;
592 if (strncasecmp(p, "bytes ", 6) != 0)
593 return (-1);
594 p += 6;
595 if (*p == '*') {
596 first = last = -1;
597 ++p;
598 } else {
599 for (first = 0; *p && isdigit((unsigned char)*p); ++p)
600 first = first * 10 + *p - '0';
601 if (*p != '-')
602 return (-1);
603 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
604 last = last * 10 + *p - '0';
606 if (first > last || *p != '/')
607 return (-1);
608 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
609 len = len * 10 + *p - '0';
610 if (*p || len < last - first + 1)
611 return (-1);
612 if (first == -1)
613 *length = 0;
614 else
615 *length = last - first + 1;
616 *offset = first;
617 *size = len;
618 return (0);
622 /*****************************************************************************
623 * Helper functions for authorization
627 * Base64 encoding
629 static char *
630 http_base64(const char *src)
632 static const char base64[] =
633 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
634 "abcdefghijklmnopqrstuvwxyz"
635 "0123456789+/";
636 char *str, *dst;
637 size_t l;
638 int t, r;
640 l = strlen(src);
641 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
642 return (NULL);
643 dst = str;
644 r = 0;
646 while (l >= 3) {
647 t = (src[0] << 16) | (src[1] << 8) | src[2];
648 dst[0] = base64[(t >> 18) & 0x3f];
649 dst[1] = base64[(t >> 12) & 0x3f];
650 dst[2] = base64[(t >> 6) & 0x3f];
651 dst[3] = base64[(t >> 0) & 0x3f];
652 src += 3; l -= 3;
653 dst += 4; r += 4;
656 switch (l) {
657 case 2:
658 t = (src[0] << 16) | (src[1] << 8);
659 dst[0] = base64[(t >> 18) & 0x3f];
660 dst[1] = base64[(t >> 12) & 0x3f];
661 dst[2] = base64[(t >> 6) & 0x3f];
662 dst[3] = '=';
663 dst += 4;
664 r += 4;
665 break;
666 case 1:
667 t = src[0] << 16;
668 dst[0] = base64[(t >> 18) & 0x3f];
669 dst[1] = base64[(t >> 12) & 0x3f];
670 dst[2] = dst[3] = '=';
671 dst += 4;
672 r += 4;
673 break;
674 case 0:
675 break;
678 *dst = 0;
679 return (str);
683 * Encode username and password
685 #ifndef __minix
686 static int
687 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
689 char *upw, *auth;
690 int r;
692 if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
693 return (-1);
694 auth = http_base64(upw);
695 free(upw);
696 if (auth == NULL)
697 return (-1);
698 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
699 free(auth);
700 return (r);
702 #else
703 static int
704 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
706 char upw[MINBUFSIZE], *auth;
707 int len, r;
709 len = snprintf(&upw[0], MINBUFSIZE, "%s:%s", usr, pwd);
710 if (len >= MINBUFSIZE)
711 return -1;
712 auth = http_base64(&upw[0]);
713 if (auth == NULL)
714 return (-1);
715 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
716 free(auth);
717 return (r);
719 #endif
721 * Send an authorization header
723 static int
724 http_authorize(conn_t *conn, const char *hdr, const char *p)
726 /* basic authorization */
727 if (strncasecmp(p, "basic:", 6) == 0) {
728 char *user, *pwd, *str;
729 int r;
731 /* skip realm */
732 for (p += 6; *p && *p != ':'; ++p)
733 /* nothing */ ;
734 if (!*p || strchr(++p, ':') == NULL)
735 return (-1);
736 if ((str = strdup(p)) == NULL)
737 return (-1); /* XXX */
738 user = str;
739 pwd = strchr(str, ':');
740 *pwd++ = '\0';
741 r = http_basic_auth(conn, hdr, user, pwd);
742 free(str);
743 return (r);
745 return (-1);
749 /*****************************************************************************
750 * Helper functions for connecting to a server or proxy
754 * Connect to the correct HTTP server or proxy.
756 static conn_t *
757 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
759 conn_t *conn;
760 int af, verbose;
761 #ifdef TCP_NOPUSH
762 int val;
763 #endif
765 *cached = 1;
767 #ifdef INET6
768 af = AF_UNSPEC;
769 #else
770 af = AF_INET;
771 #endif
773 verbose = CHECK_FLAG('v');
774 if (CHECK_FLAG('4'))
775 af = AF_INET;
776 #ifdef INET6
777 else if (CHECK_FLAG('6'))
778 af = AF_INET6;
779 #endif
781 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
782 URL = purl;
783 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
784 /* can't talk http to an ftp server */
785 /* XXX should set an error code */
786 return (NULL);
789 if ((conn = fetch_cache_get(URL, af)) != NULL) {
790 *cached = 1;
791 return (conn);
794 if ((conn = fetch_connect(URL, af, verbose)) == NULL)
795 /* fetch_connect() has already set an error code */
796 return (NULL);
797 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
798 fetch_ssl(conn, verbose) == -1) {
799 fetch_close(conn);
800 /* grrr */
801 #ifdef EAUTH
802 errno = EAUTH;
803 #else
804 errno = EPERM;
805 #endif
806 fetch_syserr();
807 return (NULL);
810 #ifdef TCP_NOPUSH
811 val = 1;
812 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
813 #endif
815 return (conn);
818 static struct url *
819 http_get_proxy(struct url * url, const char *flags)
821 struct url *purl;
822 char *p;
824 if (flags != NULL && strchr(flags, 'd') != NULL)
825 return (NULL);
826 if (fetch_no_proxy_match(url->host))
827 return (NULL);
828 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
829 *p && (purl = fetchParseURL(p))) {
830 if (!*purl->scheme)
831 strcpy(purl->scheme, SCHEME_HTTP);
832 if (!purl->port)
833 purl->port = fetch_default_proxy_port(purl->scheme);
834 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
835 return (purl);
836 fetchFreeURL(purl);
838 return (NULL);
841 static void
842 set_if_modified_since(conn_t *conn, time_t last_modified)
844 static const char weekdays[] = "SunMonTueWedThuFriSat";
845 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
846 struct tm tm;
847 char buf[80];
848 gmtime_r(&last_modified, &tm);
849 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT",
850 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
851 tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
852 http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
856 /*****************************************************************************
857 * Core
861 * Send a request and process the reply
863 * XXX This function is way too long, the do..while loop should be split
864 * XXX off into a separate function.
866 fetchIO *
867 http_request(struct url *URL, const char *op, struct url_stat *us,
868 struct url *purl, const char *flags)
870 conn_t *conn;
871 struct url *url, *new;
872 int chunked, direct, if_modified_since, need_auth, noredirect;
873 int keep_alive, verbose, cached;
874 int e, i, n, val;
875 off_t offset, clength, length, size;
876 time_t mtime;
877 const char *p;
878 fetchIO *f;
879 hdr_t h;
880 char hbuf[URL_HOSTLEN + 7], *host;
882 direct = CHECK_FLAG('d');
883 noredirect = CHECK_FLAG('A');
884 verbose = CHECK_FLAG('v');
885 if_modified_since = CHECK_FLAG('i');
886 keep_alive = 0;
888 if (direct && purl) {
889 fetchFreeURL(purl);
890 purl = NULL;
893 /* try the provided URL first */
894 url = URL;
896 /* if the A flag is set, we only get one try */
897 n = noredirect ? 1 : MAX_REDIRECT;
898 i = 0;
900 e = HTTP_PROTOCOL_ERROR;
901 need_auth = 0;
902 do {
903 new = NULL;
904 chunked = 0;
905 offset = 0;
906 clength = -1;
907 length = -1;
908 size = -1;
909 mtime = 0;
911 /* check port */
912 if (!url->port)
913 url->port = fetch_default_port(url->scheme);
915 /* were we redirected to an FTP URL? */
916 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
917 if (strcmp(op, "GET") == 0)
918 return (ftp_request(url, "RETR", NULL, us, purl, flags));
919 else if (strcmp(op, "HEAD") == 0)
920 return (ftp_request(url, "STAT", NULL, us, purl, flags));
923 /* connect to server or proxy */
924 if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
925 goto ouch;
927 host = url->host;
928 #ifdef INET6
929 if (strchr(url->host, ':')) {
930 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
931 host = hbuf;
933 #endif
934 if (url->port != fetch_default_port(url->scheme)) {
935 if (host != hbuf) {
936 strcpy(hbuf, host);
937 host = hbuf;
939 snprintf(hbuf + strlen(hbuf),
940 sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
943 /* send request */
944 if (verbose)
945 fetch_info("requesting %s://%s%s",
946 url->scheme, host, url->doc);
947 if (purl) {
948 http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
949 op, url->scheme, host, url->doc);
950 } else {
951 http_cmd(conn, "%s %s HTTP/1.1\r\n",
952 op, url->doc);
955 if (if_modified_since && url->last_modified > 0)
956 set_if_modified_since(conn, url->last_modified);
958 /* virtual host */
959 http_cmd(conn, "Host: %s\r\n", host);
961 /* proxy authorization */
962 if (purl) {
963 if (*purl->user || *purl->pwd)
964 http_basic_auth(conn, "Proxy-Authorization",
965 purl->user, purl->pwd);
966 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
967 http_authorize(conn, "Proxy-Authorization", p);
970 /* server authorization */
971 if (need_auth || *url->user || *url->pwd) {
972 if (*url->user || *url->pwd)
973 http_basic_auth(conn, "Authorization", url->user, url->pwd);
974 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
975 http_authorize(conn, "Authorization", p);
976 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
977 http_basic_auth(conn, "Authorization", url->user, url->pwd);
978 } else {
979 http_seterr(HTTP_NEED_AUTH);
980 goto ouch;
984 /* other headers */
985 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
986 if (strcasecmp(p, "auto") == 0)
987 http_cmd(conn, "Referer: %s://%s%s\r\n",
988 url->scheme, host, url->doc);
989 else
990 http_cmd(conn, "Referer: %s\r\n", p);
992 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
993 http_cmd(conn, "User-Agent: %s\r\n", p);
994 else
995 http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
996 #ifndef __minix
997 if (url->offset > 0)
998 http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
999 #else
1000 if (url->offset > 0)
1001 http_cmd(conn, "Range: bytes=%ld-\r\n", (long)url->offset);
1002 #endif
1003 http_cmd(conn, "\r\n");
1006 * Force the queued request to be dispatched. Normally, one
1007 * would do this with shutdown(2) but squid proxies can be
1008 * configured to disallow such half-closed connections. To
1009 * be compatible with such configurations, fiddle with socket
1010 * options to force the pending data to be written.
1012 #ifdef TCP_NOPUSH
1013 val = 0;
1014 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1015 sizeof(val));
1016 #endif
1017 val = 1;
1018 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1019 sizeof(val));
1021 /* get reply */
1022 switch (http_get_reply(conn)) {
1023 case HTTP_OK:
1024 case HTTP_PARTIAL:
1025 case HTTP_NOT_MODIFIED:
1026 /* fine */
1027 break;
1028 case HTTP_MOVED_PERM:
1029 case HTTP_MOVED_TEMP:
1030 case HTTP_SEE_OTHER:
1032 * Not so fine, but we still have to read the
1033 * headers to get the new location.
1035 break;
1036 case HTTP_NEED_AUTH:
1037 if (need_auth) {
1039 * We already sent out authorization code,
1040 * so there's nothing more we can do.
1042 http_seterr(conn->err);
1043 goto ouch;
1045 /* try again, but send the password this time */
1046 if (verbose)
1047 fetch_info("server requires authorization");
1048 break;
1049 case HTTP_NEED_PROXY_AUTH:
1051 * If we're talking to a proxy, we already sent
1052 * our proxy authorization code, so there's
1053 * nothing more we can do.
1055 http_seterr(conn->err);
1056 goto ouch;
1057 case HTTP_BAD_RANGE:
1059 * This can happen if we ask for 0 bytes because
1060 * we already have the whole file. Consider this
1061 * a success for now, and check sizes later.
1063 break;
1064 case HTTP_PROTOCOL_ERROR:
1065 /* fall through */
1066 case -1:
1067 --i;
1068 if (cached)
1069 continue;
1070 fetch_syserr();
1071 goto ouch;
1072 default:
1073 http_seterr(conn->err);
1074 if (!verbose)
1075 goto ouch;
1076 /* fall through so we can get the full error message */
1079 /* get headers */
1080 do {
1081 switch ((h = http_next_header(conn, &p))) {
1082 case hdr_syserror:
1083 fetch_syserr();
1084 goto ouch;
1085 case hdr_error:
1086 http_seterr(HTTP_PROTOCOL_ERROR);
1087 goto ouch;
1088 case hdr_connection:
1089 /* XXX too weak? */
1090 keep_alive = (strcasecmp(p, "keep-alive") == 0);
1091 break;
1092 case hdr_content_length:
1093 http_parse_length(p, &clength);
1094 break;
1095 case hdr_content_range:
1096 http_parse_range(p, &offset, &length, &size);
1097 break;
1098 case hdr_last_modified:
1099 http_parse_mtime(p, &mtime);
1100 break;
1101 case hdr_location:
1102 if (!HTTP_REDIRECT(conn->err))
1103 break;
1104 if (new)
1105 free(new);
1106 if (verbose)
1107 fetch_info("%d redirect to %s", conn->err, p);
1108 if (*p == '/')
1109 /* absolute path */
1110 new = fetchMakeURL(url->scheme, url->host, url->port, p,
1111 url->user, url->pwd);
1112 else
1113 new = fetchParseURL(p);
1114 if (new == NULL) {
1115 /* XXX should set an error code */
1116 goto ouch;
1118 if (!*new->user && !*new->pwd) {
1119 strcpy(new->user, url->user);
1120 strcpy(new->pwd, url->pwd);
1122 new->offset = url->offset;
1123 new->length = url->length;
1124 break;
1125 case hdr_transfer_encoding:
1126 /* XXX weak test*/
1127 chunked = (strcasecmp(p, "chunked") == 0);
1128 break;
1129 case hdr_www_authenticate:
1130 if (conn->err != HTTP_NEED_AUTH)
1131 break;
1132 /* if we were smarter, we'd check the method and realm */
1133 break;
1134 case hdr_end:
1135 /* fall through */
1136 case hdr_unknown:
1137 /* ignore */
1138 break;
1140 } while (h > hdr_end);
1142 /* we need to provide authentication */
1143 if (conn->err == HTTP_NEED_AUTH) {
1144 e = conn->err;
1145 need_auth = 1;
1146 fetch_close(conn);
1147 conn = NULL;
1148 continue;
1151 /* requested range not satisfiable */
1152 if (conn->err == HTTP_BAD_RANGE) {
1153 if (url->offset == size && url->length == 0) {
1154 /* asked for 0 bytes; fake it */
1155 offset = url->offset;
1156 conn->err = HTTP_OK;
1157 break;
1158 } else {
1159 http_seterr(conn->err);
1160 goto ouch;
1164 /* we have a hit or an error */
1165 if (conn->err == HTTP_OK ||
1166 conn->err == HTTP_PARTIAL ||
1167 conn->err == HTTP_NOT_MODIFIED ||
1168 HTTP_ERROR(conn->err))
1169 break;
1171 /* all other cases: we got a redirect */
1172 e = conn->err;
1173 need_auth = 0;
1174 fetch_close(conn);
1175 conn = NULL;
1176 if (!new)
1177 break;
1178 if (url != URL)
1179 fetchFreeURL(url);
1180 url = new;
1181 } while (++i < n);
1183 /* we failed, or ran out of retries */
1184 if (conn == NULL) {
1185 http_seterr(e);
1186 goto ouch;
1189 /* check for inconsistencies */
1190 if (clength != -1 && length != -1 && clength != length) {
1191 http_seterr(HTTP_PROTOCOL_ERROR);
1192 goto ouch;
1194 if (clength == -1)
1195 clength = length;
1196 if (clength != -1)
1197 length = offset + clength;
1198 if (length != -1 && size != -1 && length != size) {
1199 http_seterr(HTTP_PROTOCOL_ERROR);
1200 goto ouch;
1202 if (size == -1)
1203 size = length;
1205 /* fill in stats */
1206 if (us) {
1207 us->size = size;
1208 us->atime = us->mtime = mtime;
1211 /* too far? */
1212 if (URL->offset > 0 && offset > URL->offset) {
1213 http_seterr(HTTP_PROTOCOL_ERROR);
1214 goto ouch;
1217 /* report back real offset and size */
1218 URL->offset = offset;
1219 URL->length = clength;
1221 if (clength == -1 && !chunked)
1222 keep_alive = 0;
1224 if (conn->err == HTTP_NOT_MODIFIED) {
1225 http_seterr(HTTP_NOT_MODIFIED);
1226 if (keep_alive) {
1227 fetch_cache_put(conn, fetch_close);
1228 conn = NULL;
1230 goto ouch;
1233 /* wrap it up in a fetchIO */
1234 if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1235 fetch_syserr();
1236 goto ouch;
1239 if (url != URL)
1240 fetchFreeURL(url);
1241 if (purl)
1242 fetchFreeURL(purl);
1244 if (HTTP_ERROR(conn->err)) {
1246 if (keep_alive) {
1247 char buf[512];
1248 do {
1249 } while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1252 fetchIO_close(f);
1253 f = NULL;
1256 return (f);
1258 ouch:
1259 if (url != URL)
1260 fetchFreeURL(url);
1261 if (purl)
1262 fetchFreeURL(purl);
1263 if (conn != NULL)
1264 fetch_close(conn);
1265 return (NULL);
1269 /*****************************************************************************
1270 * Entry points
1274 * Retrieve and stat a file by HTTP
1276 fetchIO *
1277 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1279 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1283 * Retrieve a file by HTTP
1285 fetchIO *
1286 fetchGetHTTP(struct url *URL, const char *flags)
1288 return (fetchXGetHTTP(URL, NULL, flags));
1292 * Store a file by HTTP
1294 fetchIO *
1295 fetchPutHTTP(struct url *URL, const char *flags)
1297 fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1298 return (NULL);
1302 * Get an HTTP document's metadata
1305 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1307 fetchIO *f;
1309 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1310 if (f == NULL)
1311 return (-1);
1312 fetchIO_close(f);
1313 return (0);
1316 enum http_states {
1317 ST_NONE,
1318 ST_LT,
1319 ST_LTA,
1320 ST_TAGA,
1321 ST_H,
1322 ST_R,
1323 ST_E,
1324 ST_F,
1325 ST_HREF,
1326 ST_HREFQ,
1327 ST_TAG,
1328 ST_TAGAX,
1329 ST_TAGAQ
1332 struct index_parser {
1333 struct url_list *ue;
1334 struct url *url;
1335 enum http_states state;
1338 static ssize_t
1339 parse_index(struct index_parser *parser, const char *buf, size_t len)
1341 char *end_attr, p = *buf;
1343 switch (parser->state) {
1344 case ST_NONE:
1345 /* Plain text, not in markup */
1346 if (p == '<')
1347 parser->state = ST_LT;
1348 return 1;
1349 case ST_LT:
1350 /* In tag -- "<" already found */
1351 if (p == '>')
1352 parser->state = ST_NONE;
1353 else if (p == 'a' || p == 'A')
1354 parser->state = ST_LTA;
1355 else if (!isspace((unsigned char)p))
1356 parser->state = ST_TAG;
1357 return 1;
1358 case ST_LTA:
1359 /* In tag -- "<a" already found */
1360 if (p == '>')
1361 parser->state = ST_NONE;
1362 else if (p == '"')
1363 parser->state = ST_TAGAQ;
1364 else if (isspace((unsigned char)p))
1365 parser->state = ST_TAGA;
1366 else
1367 parser->state = ST_TAG;
1368 return 1;
1369 case ST_TAG:
1370 /* In tag, but not "<a" -- disregard */
1371 if (p == '>')
1372 parser->state = ST_NONE;
1373 return 1;
1374 case ST_TAGA:
1375 /* In a-tag -- "<a " already found */
1376 if (p == '>')
1377 parser->state = ST_NONE;
1378 else if (p == '"')
1379 parser->state = ST_TAGAQ;
1380 else if (p == 'h' || p == 'H')
1381 parser->state = ST_H;
1382 else if (!isspace((unsigned char)p))
1383 parser->state = ST_TAGAX;
1384 return 1;
1385 case ST_TAGAX:
1386 /* In unknown keyword in a-tag */
1387 if (p == '>')
1388 parser->state = ST_NONE;
1389 else if (p == '"')
1390 parser->state = ST_TAGAQ;
1391 else if (isspace((unsigned char)p))
1392 parser->state = ST_TAGA;
1393 return 1;
1394 case ST_TAGAQ:
1395 /* In a-tag, unknown argument for keys. */
1396 if (p == '>')
1397 parser->state = ST_NONE;
1398 else if (p == '"')
1399 parser->state = ST_TAGA;
1400 return 1;
1401 case ST_H:
1402 /* In a-tag -- "<a h" already found */
1403 if (p == '>')
1404 parser->state = ST_NONE;
1405 else if (p == '"')
1406 parser->state = ST_TAGAQ;
1407 else if (p == 'r' || p == 'R')
1408 parser->state = ST_R;
1409 else if (isspace((unsigned char)p))
1410 parser->state = ST_TAGA;
1411 else
1412 parser->state = ST_TAGAX;
1413 return 1;
1414 case ST_R:
1415 /* In a-tag -- "<a hr" already found */
1416 if (p == '>')
1417 parser->state = ST_NONE;
1418 else if (p == '"')
1419 parser->state = ST_TAGAQ;
1420 else if (p == 'e' || p == 'E')
1421 parser->state = ST_E;
1422 else if (isspace((unsigned char)p))
1423 parser->state = ST_TAGA;
1424 else
1425 parser->state = ST_TAGAX;
1426 return 1;
1427 case ST_E:
1428 /* In a-tag -- "<a hre" already found */
1429 if (p == '>')
1430 parser->state = ST_NONE;
1431 else if (p == '"')
1432 parser->state = ST_TAGAQ;
1433 else if (p == 'f' || p == 'F')
1434 parser->state = ST_F;
1435 else if (isspace((unsigned char)p))
1436 parser->state = ST_TAGA;
1437 else
1438 parser->state = ST_TAGAX;
1439 return 1;
1440 case ST_F:
1441 /* In a-tag -- "<a href" already found */
1442 if (p == '>')
1443 parser->state = ST_NONE;
1444 else if (p == '"')
1445 parser->state = ST_TAGAQ;
1446 else if (p == '=')
1447 parser->state = ST_HREF;
1448 else if (!isspace((unsigned char)p))
1449 parser->state = ST_TAGAX;
1450 return 1;
1451 case ST_HREF:
1452 /* In a-tag -- "<a href=" already found */
1453 if (p == '>')
1454 parser->state = ST_NONE;
1455 else if (p == '"')
1456 parser->state = ST_HREFQ;
1457 else if (!isspace((unsigned char)p))
1458 parser->state = ST_TAGA;
1459 return 1;
1460 case ST_HREFQ:
1461 /* In href of the a-tag */
1462 end_attr = memchr(buf, '"', len);
1463 if (end_attr == NULL)
1464 return 0;
1465 *end_attr = '\0';
1466 parser->state = ST_TAGA;
1467 if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1468 return -1;
1469 return end_attr + 1 - buf;
1471 /* NOTREACHED */
1472 abort();
1475 struct http_index_cache {
1476 struct http_index_cache *next;
1477 struct url *location;
1478 struct url_list ue;
1481 static struct http_index_cache *index_cache;
1484 * List a directory
1487 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern, const char *flags)
1489 fetchIO *f;
1490 char buf[2 * PATH_MAX];
1491 size_t buf_len, sum_processed;
1492 ssize_t read_len, processed;
1493 struct index_parser state;
1494 struct http_index_cache *cache = NULL;
1495 int do_cache, ret;
1497 do_cache = CHECK_FLAG('c');
1499 if (do_cache) {
1500 for (cache = index_cache; cache != NULL; cache = cache->next) {
1501 if (strcmp(cache->location->scheme, url->scheme))
1502 continue;
1503 if (strcmp(cache->location->user, url->user))
1504 continue;
1505 if (strcmp(cache->location->pwd, url->pwd))
1506 continue;
1507 if (strcmp(cache->location->host, url->host))
1508 continue;
1509 if (cache->location->port != url->port)
1510 continue;
1511 if (strcmp(cache->location->doc, url->doc))
1512 continue;
1513 return fetchAppendURLList(ue, &cache->ue);
1516 cache = malloc(sizeof(*cache));
1517 fetchInitURLList(&cache->ue);
1518 cache->location = fetchCopyURL(url);
1521 f = fetchGetHTTP(url, flags);
1522 if (f == NULL) {
1523 if (do_cache) {
1524 fetchFreeURLList(&cache->ue);
1525 fetchFreeURL(cache->location);
1526 free(cache);
1528 return -1;
1531 state.url = url;
1532 state.state = ST_NONE;
1533 if (do_cache) {
1534 state.ue = &cache->ue;
1535 } else {
1536 state.ue = ue;
1539 buf_len = 0;
1541 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1542 buf_len += read_len;
1543 sum_processed = 0;
1544 do {
1545 processed = parse_index(&state, buf + sum_processed, buf_len);
1546 if (processed == -1)
1547 break;
1548 buf_len -= processed;
1549 sum_processed += processed;
1550 } while (processed != 0 && buf_len > 0);
1551 if (processed == -1) {
1552 read_len = -1;
1553 break;
1555 memmove(buf, buf + sum_processed, buf_len);
1558 fetchIO_close(f);
1560 ret = read_len < 0 ? -1 : 0;
1562 if (do_cache) {
1563 if (ret == 0) {
1564 cache->next = index_cache;
1565 index_cache = cache;
1568 if (fetchAppendURLList(ue, &cache->ue))
1569 ret = -1;
1572 return ret;