Call decompress_data only for compressed data.
[elinks/elinks-j605.git] / src / protocol / http / http.c
blob30965311c18bbee220b998e613b5e5ece79e0705
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_LIMITS_H
12 #include <limits.h>
13 #endif
15 #include "elinks.h"
17 #include "cache/cache.h"
18 #include "config/options.h"
19 #include "cookies/cookies.h"
20 #include "intl/charsets.h"
21 #include "intl/gettext/libintl.h"
22 #include "main/module.h"
23 #include "network/connection.h"
24 #include "network/progress.h"
25 #include "network/socket.h"
26 #include "osdep/ascii.h"
27 #include "osdep/osdep.h"
28 #include "osdep/sysname.h"
29 #include "protocol/auth/auth.h"
30 #include "protocol/auth/digest.h"
31 #include "protocol/date.h"
32 #include "protocol/header.h"
33 #include "protocol/http/blacklist.h"
34 #include "protocol/http/codes.h"
35 #include "protocol/http/http.h"
36 #include "protocol/uri.h"
37 #include "session/session.h"
38 #include "terminal/terminal.h"
39 #include "util/base64.h"
40 #include "util/conv.h"
41 #include "util/memory.h"
42 #include "util/string.h"
44 #ifdef CONFIG_GSSAPI
45 #include "http_negotiate.h"
46 #endif
48 /* These macros concern the struct http_version defined in the http.h */
49 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
50 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
51 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
52 #define PRE_HTTP_1_0(x) ((x).major < 1)
53 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
54 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
55 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
58 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
59 #define LEN_FINISHED 0
61 /* Either bytes coming in this chunk yet or "parser state". */
62 #define CHUNK_DATA_END -3
63 #define CHUNK_ZERO_SIZE -2
64 #define CHUNK_SIZE -1
66 static struct auth_entry proxy_auth;
68 static unsigned char *accept_charset = NULL;
71 static union option_info http_options[] = {
72 INIT_OPT_TREE("protocol", N_("HTTP"),
73 "http", 0,
74 N_("HTTP-specific options.")),
77 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
78 "bugs", 0,
79 N_("Server-side HTTP bugs workarounds.")),
81 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
82 "accept_charset", 0, 1,
83 N_("The Accept-Charset header is quite long and sending it "
84 "can trigger bugs in some rarely found servers.")),
86 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
87 "allow_blacklist", 0, 1,
88 N_("Allow blacklisting of buggy servers.")),
90 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
91 "broken_302_redirect", 0, 1,
92 N_("Broken 302 redirect (violates RFC but compatible with "
93 "Netscape). This is a problem for a lot of web discussion "
94 "boards and the like. If they will do strange things to you, "
95 "try to play with this.")),
97 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
98 "post_no_keepalive", 0, 0,
99 N_("Disable keepalive connection after POST request.")),
101 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
102 "http10", 0, 0,
103 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
105 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
106 "proxy", 0,
107 N_("HTTP proxy configuration.")),
109 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
110 "host", 0, "",
111 N_("Host and port-number (host:port) of the HTTP proxy, "
112 "or blank. If it's blank, HTTP_PROXY environment variable "
113 "is checked as well.")),
115 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
116 "user", 0, "",
117 N_("Proxy authentication username.")),
119 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
120 "passwd", 0, "",
121 N_("Proxy authentication password.")),
124 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
125 "referer", 0,
126 N_("HTTP referer sending options. HTTP referer is a special "
127 "header sent in the HTTP requests, which is supposed to "
128 "contain the previous page visited by the browser."
129 "This way, the server can know what link did you follow "
130 "when accessing that page. However, this behaviour can "
131 "unfortunately considerably affect privacy and can lead even "
132 "to a security problem on some badly designed web pages.")),
134 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
135 "policy", 0,
136 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
137 N_("Mode of sending HTTP referer:\n"
138 "0 is send no referer\n"
139 "1 is send current URL as referer\n"
140 "2 is send fixed fake referer\n"
141 "3 is send previous URL as referer (correct, but insecure)")),
143 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
144 "fake", 0, "",
145 N_("Fake referer to be sent when policy is 2.")),
148 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
149 "accept_language", 0, "",
150 N_("Send Accept-Language header.")),
152 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
153 "accept_ui_language", 0, 1,
154 N_("Request localised versions of documents from web-servers "
155 "(using the Accept-Language header) using the language "
156 "you have configured for ELinks' user-interface (this also "
157 "affects navigator.language ECMAScript value available to "
158 "scripts). Note that some see this as a potential security "
159 "risk because it tells web-masters and the FBI sniffers "
160 "about your language preference.")),
162 /* http://www.eweek.com/c/a/Desktops-and-Notebooks/Intel-Psion-End-Dispute-Concerning-Netbook-Trademark-288875/
163 * responds with "Transfer-Encoding: chunked" and
164 * "Content-Encoding: gzip" but does not compress the first chunk
165 * and the last chunk, causing ELinks to display garbage.
166 * (If User-Agent includes "Gecko" (case sensitive), then
167 * that server correctly compresses the whole stream.)
168 * ELinks should instead report the decompression error (bug 1017)
169 * or perhaps even blacklist the server for compression and retry.
170 * Until that has been implemented, disable compression by default. */
171 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
172 "compression", 0, 0,
173 N_("If enabled, the capability to receive compressed content "
174 "(gzip and/or bzip2) is announced to the server, which "
175 "usually sends the reply compressed, thus saving some "
176 "bandwidth at slight CPU expense.\n"
177 "\n"
178 "If ELinks displays a incomplete page or garbage, try "
179 "disabling this option. If that helps, there may be a bug in "
180 "the decompression part of ELinks. Please report such bugs.\n"
181 "\n"
182 "If ELinks has been compiled without compression support, "
183 "this option has no effect. To check the supported features, "
184 "see Help -> About.")),
186 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
187 "trace", 0, 0,
188 N_("If active, all HTTP requests are sent with TRACE as "
189 "their method rather than GET or POST. This is useful for "
190 "debugging of both ELinks and various server-side scripts "
191 "--- the server only returns the client's request back to "
192 "the client verbatim. Note that this type of request may "
193 "not be enabled on all servers.")),
195 /* OSNews.com is supposed to be relying on the textmode token, at least. */
196 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
197 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
198 N_("Change the User Agent ID. That means identification "
199 "string, which is sent to HTTP server when a document is "
200 "requested. The 'textmode' token in the first field is our "
201 "silent attempt to establish this as a standard for new "
202 "textmode user agents, so that the webmasters can have "
203 "just a single uniform test for these if they are e.g. "
204 "pushing some lite version to them automagically.\n"
205 "\n"
206 "Use \" \" if you don't want any User-Agent header to be sent "
207 "at all. URI rewriting rules may still include parameters "
208 "that reveal you are using ELinks.\n"
209 "\n"
210 "%v in the string means ELinks version,\n"
211 "%s in the string means system identification,\n"
212 "%t in the string means size of the terminal,\n"
213 "%b in the string means number of bars displayed by ELinks.")),
216 INIT_OPT_TREE("protocol", N_("HTTPS"),
217 "https", 0,
218 N_("HTTPS-specific options.")),
220 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
221 "proxy", 0,
222 N_("HTTPS proxy configuration.")),
224 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
225 "host", 0, "",
226 N_("Host and port-number (host:port) of the HTTPS CONNECT "
227 "proxy, or blank. If it's blank, HTTPS_PROXY environment "
228 "variable is checked as well.")),
229 NULL_OPTION_INFO,
232 static void done_http();
234 struct module http_protocol_module = struct_module(
235 /* name: */ N_("HTTP"),
236 /* options: */ http_options,
237 /* hooks: */ NULL,
238 /* submodules: */ NULL,
239 /* data: */ NULL,
240 /* init: */ NULL,
241 /* done: */ done_http
245 static void
246 done_http(void)
248 mem_free_if(proxy_auth.realm);
249 mem_free_if(proxy_auth.nonce);
250 mem_free_if(proxy_auth.opaque);
252 free_blacklist();
254 if (accept_charset)
255 mem_free(accept_charset);
258 static void
259 init_accept_charset(void)
261 struct string ac;
262 unsigned char *cs;
263 int i;
265 if (!init_string(&ac)) return;
267 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
268 if (ac.length) {
269 add_to_string(&ac, ", ");
270 } else {
271 add_to_string(&ac, "Accept-Charset: ");
273 add_to_string(&ac, cs);
276 if (ac.length) {
277 add_crlf_to_string(&ac);
280 accept_charset = squeezastring(&ac);
282 done_string(&ac);
286 unsigned char *
287 subst_user_agent(unsigned char *fmt, unsigned char *version,
288 unsigned char *sysname, unsigned char *termsize)
290 struct string agent;
292 if (!init_string(&agent)) return NULL;
294 while (*fmt) {
295 int p;
297 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
299 add_bytes_to_string(&agent, fmt, p);
300 fmt += p;
302 if (*fmt != '%') continue;
304 fmt++;
305 switch (*fmt) {
306 case 'b':
307 if (!list_empty(sessions)) {
308 unsigned char bs[4] = "";
309 int blen = 0;
310 struct session *ses = sessions.prev;
311 int bars = ses->status.show_status_bar
312 + ses->status.show_tabs_bar
313 + ses->status.show_title_bar;
315 ulongcat(bs, &blen, bars, 2, 0);
316 add_to_string(&agent, bs);
318 break;
319 case 'v':
320 add_to_string(&agent, version);
321 break;
322 case 's':
323 add_to_string(&agent, sysname);
324 break;
325 case 't':
326 if (termsize)
327 add_to_string(&agent, termsize);
328 break;
329 default:
330 add_bytes_to_string(&agent, fmt - 1, 2);
331 break;
333 if (*fmt) fmt++;
336 return agent.source;
339 static void
340 add_url_to_http_string(struct string *header, struct uri *uri, int components)
342 /* This block substitues spaces in URL by %20s. This is
343 * certainly not the right place where to do it, but now the
344 * behaviour is at least improved compared to what we had
345 * before. We should probably encode all URLs as early as
346 * possible, and possibly decode them back in protocol
347 * backends. --pasky */
348 unsigned char *string = get_uri_string(uri, components);
349 unsigned char *data = string;
351 if (!string) return;
353 while (*data) {
354 int len = strcspn(data, " \t\r\n\\");
356 add_bytes_to_string(header, data, len);
358 if (!data[len]) break;
360 if (data[len++] == '\\')
361 add_char_to_string(header, '/');
362 else
363 add_to_string(header, "%20");
365 data += len;
368 mem_free(string);
371 /* Parse from @end - 1 to @start and set *@value to integer found.
372 * It returns -1 if not a number, 0 otherwise.
373 * @end should be > @start. */
374 static int
375 revstr2num(unsigned char *start, unsigned char *end, int *value)
377 int q = 1, val = 0;
379 do {
380 --end;
381 if (!isdigit(*end)) return -1; /* NaN */
382 val += (*end - '0') * q;
383 q *= 10;
384 } while (end > start);
386 *value = val;
387 return 0;
390 /* This function extracts code, major and minor version from string
391 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
392 * It returns a negative value on error, 0 on success.
394 static int
395 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
397 unsigned char *head = rb->data;
398 unsigned char *start;
400 *code = 0;
401 version->major = 0;
402 version->minor = 0;
404 /* Ignore spaces. */
405 while (*head == ' ') head++;
407 /* HTTP/ */
408 if (c_toupper(*head) != 'H' || c_toupper(*++head) != 'T' ||
409 c_toupper(*++head) != 'T' || c_toupper(*++head) != 'P'
410 || *++head != '/')
411 return -1;
413 /* Version */
414 start = ++head;
415 /* Find next '.' */
416 while (*head && *head != '.') head++;
417 /* Sanity check. */
418 if (!*head || !(head - start)
419 || (head - start) > 4
420 || !isdigit(*(head + 1)))
421 return -2;
423 /* Extract major version number. */
424 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
426 start = head + 1;
428 /* Find next ' '. */
429 while (*head && *head != ' ') head++;
430 /* Sanity check. */
431 if (!*head || !(head - start) || (head - start) > 4) return -4;
433 /* Extract minor version number. */
434 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
436 /* Ignore spaces. */
437 while (*head == ' ') head++;
439 /* Sanity check for code. */
440 if (head[0] < '1' || head[0] > '9' ||
441 !isdigit(head[1]) ||
442 !isdigit(head[2]))
443 return -6; /* Invalid code. */
445 /* Extract code. */
446 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
448 return 0;
451 static int
452 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
453 unsigned char *head)
455 unsigned char *server;
456 const unsigned char *const *s;
457 static const unsigned char *const buggy_servers[] = {
458 "mod_czech/3.1.0",
459 "Purveyor",
460 "Netscape-Enterprise",
461 NULL
464 if (!get_opt_bool("protocol.http.bugs.allow_blacklist", NULL)
465 || HTTP_1_0(http->sent_version))
466 return 0;
468 server = parse_header(head, "Server", NULL);
469 if (!server)
470 return 0;
472 for (s = buggy_servers; *s; s++) {
473 if (strstr(server, *s)) {
474 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
475 break;
479 mem_free(server);
480 return (*s != NULL);
483 static void
484 http_end_request(struct connection *conn, struct connection_state state,
485 int notrunc)
487 struct http_connection_info *http;
489 shutdown_connection_stream(conn);
491 /* shutdown_connection_stream() should not change conn->info,
492 * but in case it does, read conn->info only after the call. */
493 http = conn->info;
494 if (http)
495 done_http_post(&http->post);
497 if (http && !http->close
498 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
499 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL)
500 || !conn->uri->post)) {
501 if (is_in_state(state, S_OK) && conn->cached)
502 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
503 set_connection_state(conn, state);
504 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
505 } else {
506 abort_connection(conn, state);
510 static void http_send_header(struct socket *);
512 void
513 http_protocol_handler(struct connection *conn)
515 /* setcstate(conn, S_CONN); */
517 if (!has_keepalive_connection(conn)) {
518 make_connection(conn->socket, conn->uri, http_send_header,
519 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
520 } else {
521 http_send_header(conn->socket);
525 void
526 proxy_protocol_handler(struct connection *conn)
528 http_protocol_handler(conn);
531 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
533 #define connection_is_https_proxy(conn) \
534 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
536 /** connection.done points to this function if connection.info points
537 * to a struct http_connection_info. */
538 static void
539 done_http_connection(struct connection *conn)
541 struct http_connection_info *http = conn->info;
543 done_http_post(&http->post);
544 mem_free(http);
545 conn->info = NULL;
546 conn->done = NULL;
549 struct http_connection_info *
550 init_http_connection_info(struct connection *conn, int major, int minor, int close)
552 struct http_connection_info *http;
554 http = mem_calloc(1, sizeof(*http));
555 if (!http) {
556 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
557 return NULL;
560 http->sent_version.major = major;
561 http->sent_version.minor = minor;
562 http->close = close;
564 init_http_post(&http->post);
566 /* The CGI code uses this too and blacklisting expects a host name. */
567 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
568 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
570 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
571 || get_opt_bool("protocol.http.bugs.http10", NULL)) {
572 http->sent_version.major = 1;
573 http->sent_version.minor = 0;
576 /* If called from HTTPS proxy connection the connection info might have
577 * already been allocated. */
578 if (conn->done) {
579 conn->done(conn);
580 conn->done = NULL;
582 mem_free_set(&conn->info, http);
583 conn->done = done_http_connection;
585 return http;
588 static void
589 accept_encoding_header(struct string *header)
591 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
592 int comma = 0;
594 add_to_string(header, "Accept-Encoding: ");
596 #ifdef CONFIG_BZIP2
597 add_to_string(header, "bzip2");
598 comma = 1;
599 #endif
601 #ifdef CONFIG_GZIP
602 if (comma) add_to_string(header, ", ");
603 add_to_string(header, "deflate, gzip");
604 comma = 1;
605 #endif
607 #ifdef CONFIG_LZMA
608 if (comma) add_to_string(header, ", ");
609 add_to_string(header, "lzma");
610 #endif
611 add_crlf_to_string(header);
612 #endif
615 #define POST_BUFFER_SIZE 16384
617 static void
618 send_more_post_data(struct socket *socket)
620 struct connection *conn = socket->conn;
621 struct http_connection_info *http = conn->info;
622 unsigned char buffer[POST_BUFFER_SIZE];
623 int got;
624 struct connection_state error;
626 got = read_http_post(&http->post, buffer, POST_BUFFER_SIZE, &error);
627 if (got < 0) {
628 http_end_request(conn, error, 0);
629 } else if (got > 0) {
630 write_to_socket(socket, buffer, got, connection_state(S_TRANS),
631 send_more_post_data);
632 } else { /* got == 0, meaning end of data */
633 /* Can't use request_from_socket() because there's no
634 * more data to write. */
635 struct read_buffer *rb = alloc_read_buffer(socket);
637 socket->state = SOCKET_END_ONCLOSE;
638 if (rb)
639 read_from_socket(socket, rb, connection_state(S_SENT),
640 http_got_header);
641 else
642 http_end_request(conn, connection_state(S_OUT_OF_MEM),
649 static void
650 http_send_header(struct socket *socket)
652 struct connection *conn = socket->conn;
653 struct http_connection_info *http;
654 int trace = get_opt_bool("protocol.http.trace", NULL);
655 struct string header;
656 unsigned char *post_data = NULL;
657 struct auth_entry *entry = NULL;
658 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
659 unsigned char *optstr;
660 int use_connect, talking_to_proxy;
662 /* Sanity check for a host */
663 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
664 http_end_request(conn, connection_state(S_BAD_URL), 0);
665 return;
668 http = init_http_connection_info(conn, 1, 1, 0);
669 if (!http) return;
671 if (!init_string(&header)) {
672 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
673 return;
676 if (!conn->cached) conn->cached = find_in_cache(uri);
678 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
679 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
681 if (trace) {
682 add_to_string(&header, "TRACE ");
683 } else if (use_connect) {
684 add_to_string(&header, "CONNECT ");
685 /* In CONNECT requests, we send only a subset of the
686 * headers to the proxy. See the "CONNECT:" comments
687 * below. After the CONNECT request succeeds, we
688 * negotiate TLS with the real server and make a new
689 * HTTP request that includes all the headers. */
690 } else if (uri->post) {
691 add_to_string(&header, "POST ");
692 conn->unrestartable = 1;
693 } else {
694 add_to_string(&header, "GET ");
697 if (!talking_to_proxy) {
698 add_char_to_string(&header, '/');
701 if (use_connect) {
702 /* Add port if it was specified or the default port */
703 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
704 } else {
705 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
706 add_url_to_http_string(&header, uri, URI_DATA);
708 } else if (talking_to_proxy) {
709 add_url_to_http_string(&header, uri, URI_PROXY);
711 } else {
712 add_url_to_http_string(&header, conn->uri, URI_DATA);
716 add_to_string(&header, " HTTP/");
717 add_long_to_string(&header, http->sent_version.major);
718 add_char_to_string(&header, '.');
719 add_long_to_string(&header, http->sent_version.minor);
720 add_crlf_to_string(&header);
722 /* CONNECT: Sending a Host header seems pointless as the same
723 * information is already in the CONNECT line. It's harmless
724 * though and Mozilla does it too. */
725 add_to_string(&header, "Host: ");
726 add_uri_to_string(&header, uri, URI_HTTP_HOST);
727 add_crlf_to_string(&header);
729 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
730 if (talking_to_proxy) {
731 unsigned char *user = get_opt_str("protocol.http.proxy.user", NULL);
732 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd", NULL);
734 if (proxy_auth.digest) {
735 unsigned char *response;
736 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
737 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
739 if (userlen)
740 memcpy(proxy_auth.user, user, userlen);
741 proxy_auth.user[userlen] = '\0';
742 if (passwordlen)
743 memcpy(proxy_auth.password, passwd, passwordlen);
744 proxy_auth.password[passwordlen] = '\0';
746 /* FIXME: @uri is the proxied URI. Maybe the passed URI
747 * should be the proxy URI aka conn->uri. --jonas */
748 response = get_http_auth_digest_response(&proxy_auth, uri);
749 if (response) {
750 add_to_string(&header, "Proxy-Authorization: Digest ");
751 add_to_string(&header, response);
752 add_crlf_to_string(&header);
754 mem_free(response);
757 } else {
758 if (user[0]) {
759 unsigned char *proxy_data;
761 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
762 if (proxy_data) {
763 unsigned char *proxy_64 = base64_encode(proxy_data);
765 if (proxy_64) {
766 add_to_string(&header, "Proxy-Authorization: Basic ");
767 add_to_string(&header, proxy_64);
768 add_crlf_to_string(&header);
769 mem_free(proxy_64);
771 mem_free(proxy_data);
777 /* CONNECT: User-Agent does not reveal anything about the
778 * resource we're fetching, and it may help the proxy return
779 * better error messages. */
780 optstr = get_opt_str("protocol.http.user_agent", NULL);
781 if (*optstr && strcmp(optstr, " ")) {
782 unsigned char *ustr, ts[64] = "";
783 /* TODO: Somehow get the terminal in which the
784 * document will actually be displayed. */
785 struct terminal *term = get_default_terminal();
787 add_to_string(&header, "User-Agent: ");
789 if (term) {
790 unsigned int tslen = 0;
792 ulongcat(ts, &tslen, term->width, 3, 0);
793 ts[tslen++] = 'x';
794 ulongcat(ts, &tslen, term->height, 3, 0);
796 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
797 ts);
799 if (ustr) {
800 add_to_string(&header, ustr);
801 mem_free(ustr);
804 add_crlf_to_string(&header);
807 /* CONNECT: Referer probably is a secret page in the HTTPS
808 * server, so don't reveal it to the proxy. */
809 if (!use_connect) {
810 switch (get_opt_int("protocol.http.referer.policy", NULL)) {
811 case REFERER_NONE:
812 /* oh well */
813 break;
815 case REFERER_FAKE:
816 optstr = get_opt_str("protocol.http.referer.fake", NULL);
817 if (!optstr[0]) break;
818 add_to_string(&header, "Referer: ");
819 add_to_string(&header, optstr);
820 add_crlf_to_string(&header);
821 break;
823 case REFERER_TRUE:
824 if (!conn->referrer) break;
825 add_to_string(&header, "Referer: ");
826 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
827 add_crlf_to_string(&header);
828 break;
830 case REFERER_SAME_URL:
831 add_to_string(&header, "Referer: ");
832 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
833 add_crlf_to_string(&header);
834 break;
838 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
839 * because they do not reveal anything about the resource
840 * we're going to request via TLS, and they may affect the
841 * error message if the CONNECT request fails.
843 * If ELinks is ever changed to vary its Accept headers based
844 * on what it intends to do with the returned resource, e.g.
845 * sending "Accept: text/css" when it wants an external
846 * stylesheet, then it should do that only in the inner GET
847 * and not in the outer CONNECT. */
848 add_to_string(&header, "Accept: */*");
849 add_crlf_to_string(&header);
851 if (get_opt_bool("protocol.http.compression", NULL))
852 accept_encoding_header(&header);
854 if (!accept_charset) {
855 init_accept_charset();
858 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
859 && !get_opt_bool("protocol.http.bugs.accept_charset", NULL)
860 && accept_charset) {
861 add_to_string(&header, accept_charset);
864 optstr = get_opt_str("protocol.http.accept_language", NULL);
865 if (optstr[0]) {
866 add_to_string(&header, "Accept-Language: ");
867 add_to_string(&header, optstr);
868 add_crlf_to_string(&header);
870 #ifdef CONFIG_NLS
871 else if (get_opt_bool("protocol.http.accept_ui_language", NULL)) {
872 unsigned char *code = language_to_iso639(current_language);
874 if (code) {
875 add_to_string(&header, "Accept-Language: ");
876 add_to_string(&header, code);
877 add_crlf_to_string(&header);
880 #endif
882 /* CONNECT: Proxy-Connection is intended to be seen by the
883 * proxy. If the CONNECT request succeeds, then the proxy
884 * will forward the remainder of the TCP connection to the
885 * origin server, and Proxy-Connection does not matter; but
886 * if the request fails, then Proxy-Connection may matter. */
887 /* FIXME: What about post-HTTP/1.1?? --Zas */
888 if (HTTP_1_1(http->sent_version)) {
889 if (!IS_PROXY_URI(conn->uri)) {
890 add_to_string(&header, "Connection: ");
891 } else {
892 add_to_string(&header, "Proxy-Connection: ");
895 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL)) {
896 add_to_string(&header, "Keep-Alive");
897 } else {
898 add_to_string(&header, "close");
900 add_crlf_to_string(&header);
903 /* CONNECT: Do not tell the proxy anything we have cached
904 * about the resource. */
905 if (!use_connect && conn->cached) {
906 if (!conn->cached->incomplete && conn->cached->head
907 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
908 if (conn->cached->last_modified) {
909 add_to_string(&header, "If-Modified-Since: ");
910 add_to_string(&header, conn->cached->last_modified);
911 add_crlf_to_string(&header);
913 if (conn->cached->etag) {
914 add_to_string(&header, "If-None-Match: ");
915 add_to_string(&header, conn->cached->etag);
916 add_crlf_to_string(&header);
921 /* CONNECT: Let's send cache control headers to the proxy too;
922 * they may affect DNS caching. */
923 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
924 add_to_string(&header, "Pragma: no-cache");
925 add_crlf_to_string(&header);
926 add_to_string(&header, "Cache-Control: no-cache");
927 add_crlf_to_string(&header);
930 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
931 * do anything good with that information anyway. */
932 if (!use_connect && (conn->from || conn->progress->start > 0)) {
933 /* conn->from takes precedence. conn->progress.start is set only the first
934 * time, then conn->from gets updated and in case of any retries
935 * etc we have everything interesting in conn->from already. */
936 add_to_string(&header, "Range: bytes=");
937 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
938 add_char_to_string(&header, '-');
939 add_crlf_to_string(&header);
942 /* CONNECT: The Authorization header is for the origin server only. */
943 if (!use_connect) {
944 #ifdef CONFIG_GSSAPI
945 if (http_negotiate_output(uri, &header) != 0)
946 #endif
947 entry = find_auth(uri);
950 if (entry) {
951 if (entry->digest) {
952 unsigned char *response;
954 response = get_http_auth_digest_response(entry, uri);
955 if (response) {
956 add_to_string(&header, "Authorization: Digest ");
957 add_to_string(&header, response);
958 add_crlf_to_string(&header);
960 mem_free(response);
963 } else {
964 /* RFC2617 section 2 [Basic Authentication Scheme]
966 * To receive authorization, the client sends the userid
967 * and password, separated by a single colon (":")
968 * character, within a base64 [7] encoded string in the
969 * credentials. */
970 unsigned char *id;
972 /* Create base64 encoded string. */
973 id = straconcat(entry->user, ":", entry->password,
974 (unsigned char *) NULL);
975 if (id) {
976 unsigned char *base64 = base64_encode(id);
978 mem_free_set(&id, base64);
981 if (id) {
982 add_to_string(&header, "Authorization: Basic ");
983 add_to_string(&header, id);
984 add_crlf_to_string(&header);
985 mem_free(id);
990 /* CONNECT: Any POST data is for the origin server only. */
991 if (!use_connect && uri->post) {
992 /* We search for first '\n' in uri->post to get content type
993 * as set by get_form_uri(). This '\n' is dropped if any
994 * and replaced by correct '\r\n' termination here. */
995 unsigned char *postend = strchr(uri->post, '\n');
996 struct connection_state error;
998 if (postend) {
999 add_to_string(&header, "Content-Type: ");
1000 add_bytes_to_string(&header, uri->post, postend - uri->post);
1001 add_crlf_to_string(&header);
1004 post_data = postend ? postend + 1 : uri->post;
1005 if (!open_http_post(&http->post, post_data, &error)) {
1006 http_end_request(conn, error, 0);
1007 done_string(&header);
1008 return;
1010 add_format_to_string(&header, "Content-Length: "
1011 "%" OFF_PRINT_FORMAT "\x0D\x0A",
1012 (off_print_T)
1013 http->post.total_upload_length);
1016 #ifdef CONFIG_COOKIES
1017 /* CONNECT: Cookies are for the origin server only. */
1018 if (!use_connect) {
1019 struct string *cookies = send_cookies(uri);
1021 if (cookies) {
1022 add_to_string(&header, "Cookie: ");
1023 add_string_to_string(&header, cookies);
1024 add_crlf_to_string(&header);
1025 done_string(cookies);
1028 #endif
1030 add_crlf_to_string(&header);
1032 /* CONNECT: Any POST data is for the origin server only.
1033 * This was already checked above and post_data is NULL
1034 * in that case. Verified with an assertion below. */
1035 if (post_data) {
1036 assert(!use_connect); /* see comment above */
1038 socket->state = SOCKET_END_ONCLOSE;
1039 if (!conn->http_upload_progress && http->post.file_count)
1040 conn->http_upload_progress = init_progress(0);
1041 write_to_socket(socket, header.source, header.length,
1042 connection_state(S_TRANS),
1043 send_more_post_data);
1044 } else
1045 request_from_socket(socket, header.source, header.length,
1046 connection_state(S_SENT),
1047 SOCKET_END_ONCLOSE, http_got_header);
1048 done_string(&header);
1051 #undef POST_BUFFER_SIZE
1054 /* This function decompresses the data block given in @data (if it was
1055 * compressed), which is long @len bytes. The decompressed data block is given
1056 * back to the world as the return value and its length is stored into
1057 * @new_len. After this function returns, the caller will discard all the @len
1058 * input bytes, so this function must use all of them unless an error occurs.
1060 * In this function, value of either http->chunk_remaining or http->length is
1061 * being changed (it depends on if chunked mode is used or not).
1063 * Note that the function is still a little esotheric for me. Don't take it
1064 * lightly and don't mess with it without grave reason! If you dare to touch
1065 * this without testing the changes on slashdot, freshmeat and cvsweb
1066 * (including revision history), don't dare to send me any patches! ;) --pasky
1068 * This function gotta die. */
1069 static unsigned char *
1070 decompress_data(struct connection *conn, unsigned char *data, int len,
1071 int *new_len)
1073 struct http_connection_info *http = conn->info;
1074 enum { NORMAL, FINISHING } state = NORMAL;
1075 int *length_of_block;
1076 unsigned char *output = NULL;
1078 if (http->length == LEN_CHUNKED) {
1079 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
1080 state = FINISHING;
1081 length_of_block = &http->chunk_remaining;
1082 } else {
1083 length_of_block = &http->length;
1084 if (!*length_of_block) {
1085 /* Going to finish this decoding bussiness. */
1086 state = FINISHING;
1090 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1092 if (!conn->stream) {
1093 conn->stream = open_encoded(-1, conn->content_encoding);
1094 if (!conn->stream) return NULL;
1097 output = decode_encoded_buffer(conn->stream, conn->content_encoding, data, len, new_len);
1099 if (*length_of_block > 0) {
1100 *length_of_block -= len;
1102 /* http->length is 0 at the end of block for all modes: keep-alive,
1103 * non-keep-alive and chunked */
1104 if (!http->length) {
1105 /* That's all, folks - let's finish this. */
1106 state = FINISHING;
1109 if (state == FINISHING) shutdown_connection_stream(conn);
1110 return output;
1113 static int
1114 is_line_in_buffer(struct read_buffer *rb)
1116 int l;
1118 for (l = 0; l < rb->length; l++) {
1119 unsigned char a0 = rb->data[l];
1121 if (a0 == ASCII_LF)
1122 return l + 1;
1123 if (a0 == ASCII_CR) {
1124 if (rb->data[l + 1] == ASCII_LF
1125 && l < rb->length - 1)
1126 return l + 2;
1127 if (l == rb->length - 1)
1128 return 0;
1130 if (a0 < ' ')
1131 return -1;
1133 return 0;
1136 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1138 static void
1139 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1140 int already_got_anything)
1142 struct connection_state state = already_got_anything
1143 ? connection_state(S_TRANS) : conn->state;
1145 read_from_socket(conn->socket, rb, state, read_http_data);
1148 static void
1149 read_http_data_done(struct connection *conn)
1151 struct http_connection_info *http = conn->info;
1153 /* There's no content but an error so just print
1154 * that instead of nothing. */
1155 if (!conn->from) {
1156 if (http->code >= 400) {
1157 http_error_document(conn, http->code);
1159 } else {
1160 /* This is not an error, thus fine. No need generate any
1161 * document, as this may be empty and it's not a problem.
1162 * In case of 3xx, we're probably just getting kicked to
1163 * another page anyway. And in case of 2xx, the document
1164 * may indeed be empty and thus the user should see it so. */
1168 http_end_request(conn, connection_state(S_OK), 0);
1171 /* Returns:
1172 * -1 on error
1173 * 0 if more to read
1174 * 1 if done
1176 static int
1177 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1179 struct http_connection_info *http = conn->info;
1180 int total_data_len = 0;
1182 while (1) {
1183 /* Chunked. Good luck! */
1184 /* See RFC2616, section 3.6.1. Basically, it looks like:
1185 * 1234 ; a = b ; c = d\r\n
1186 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1187 * 0\r\n
1188 * \r\n */
1189 if (http->chunk_remaining == CHUNK_DATA_END) {
1190 int l = is_line_in_buffer(rb);
1192 if (l) {
1193 if (l == -1) {
1194 /* Invalid character in buffer. */
1195 return -1;
1198 /* Remove everything to the EOLN. */
1199 kill_buffer_data(rb, l);
1200 if (l <= 2) {
1201 /* Empty line. */
1202 return 2;
1204 continue;
1207 } else if (http->chunk_remaining == CHUNK_SIZE) {
1208 int l = is_line_in_buffer(rb);
1210 if (l) {
1211 unsigned char *de;
1212 int n = 0;
1214 if (l != -1) {
1215 errno = 0;
1216 n = strtol(rb->data, (char **) &de, 16);
1217 if (errno || !*de) {
1218 return -1;
1222 if (l == -1 || de == rb->data) {
1223 return -1;
1226 /* Remove everything to the EOLN. */
1227 kill_buffer_data(rb, l);
1228 http->chunk_remaining = n;
1229 if (!http->chunk_remaining)
1230 http->chunk_remaining = CHUNK_ZERO_SIZE;
1231 continue;
1234 } else {
1235 int data_len;
1236 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1237 int len = zero ? 0 : http->chunk_remaining;
1239 /* Maybe everything necessary didn't come yet.. */
1240 int_upper_bound(&len, rb->length);
1241 conn->received += len;
1243 if (conn->content_encoding == ENCODING_NONE) {
1244 data_len = len;
1245 if (http->chunk_remaining > 0) http->chunk_remaining -= len;
1246 if (add_fragment(conn->cached, conn->from, rb->data, len) == 1)
1247 conn->tries = 0;
1248 } else {
1249 unsigned char *data = decompress_data(conn, rb->data, len, &data_len);
1251 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1252 conn->tries = 0;
1254 if (data) mem_free(data);
1257 conn->from += data_len;
1258 total_data_len += data_len;
1260 kill_buffer_data(rb, len);
1262 if (zero) {
1263 /* Last chunk has zero length, so this is last
1264 * chunk, we finished decompression just now
1265 * and now we can happily finish reading this
1266 * stuff. */
1267 http->chunk_remaining = CHUNK_DATA_END;
1268 continue;
1271 if (!http->chunk_remaining && rb->length > 0) {
1272 /* Eat newline succeeding each chunk. */
1273 if (rb->data[0] == ASCII_LF) {
1274 kill_buffer_data(rb, 1);
1275 } else {
1276 if (rb->data[0] != ASCII_CR
1277 || (rb->length >= 2
1278 && rb->data[1] != ASCII_LF)) {
1279 return -1;
1281 if (rb->length < 2) break;
1282 kill_buffer_data(rb, 2);
1284 http->chunk_remaining = CHUNK_SIZE;
1285 continue;
1288 break;
1291 /* More to read. */
1292 return !!total_data_len;
1295 /* Returns 0 if more data, 1 if done. */
1296 static int
1297 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1299 struct http_connection_info *http = conn->info;
1300 int data_len;
1301 int len = rb->length;
1303 if (http->length >= 0 && http->length < len) {
1304 /* We won't read more than we have to go. */
1305 len = http->length;
1308 conn->received += len;
1310 if (conn->content_encoding == ENCODING_NONE) {
1311 data_len = len;
1312 if (http->length > 0) http->length -= len;
1313 if (add_fragment(conn->cached, conn->from, rb->data, data_len) == 1)
1314 conn->tries = 0;
1315 } else {
1316 unsigned char *data = decompress_data(conn, rb->data, len, &data_len);
1318 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1319 conn->tries = 0;
1321 if (data) mem_free(data);
1324 conn->from += data_len;
1326 kill_buffer_data(rb, len);
1328 if (!http->length && (conn->socket->state == SOCKET_RETRY_ONCLOSE
1329 || conn->socket->state == SOCKET_CLOSED)) {
1330 return 2;
1333 return !!data_len;
1336 static void
1337 read_http_data(struct socket *socket, struct read_buffer *rb)
1339 struct connection *conn = socket->conn;
1340 struct http_connection_info *http = conn->info;
1341 int ret;
1343 if (socket->state == SOCKET_CLOSED) {
1344 if (conn->content_encoding) {
1345 /* Flush decompression first. */
1346 http->length = 0;
1347 } else {
1348 read_http_data_done(conn);
1349 return;
1353 if (http->length != LEN_CHUNKED) {
1354 ret = read_normal_http_data(conn, rb);
1356 } else {
1357 ret = read_chunked_http_data(conn, rb);
1360 switch (ret) {
1361 case 0:
1362 read_more_http_data(conn, rb, 0);
1363 break;
1364 case 1:
1365 read_more_http_data(conn, rb, 1);
1366 break;
1367 case 2:
1368 read_http_data_done(conn);
1369 break;
1370 default:
1371 assertm(ret == -1, "Unexpected return value: %d", ret);
1372 abort_connection(conn, connection_state(S_HTTP_ERROR));
1376 /* Returns offset of the header end, zero if more data is needed, -1 when
1377 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1378 * come. */
1379 static int
1380 get_header(struct read_buffer *rb)
1382 int i;
1384 /* XXX: We will have to do some guess about whether an HTTP header is
1385 * coming or not, in order to support HTTP/0.9 reply correctly. This
1386 * means a little code duplication with get_http_code(). --pasky */
1387 if (rb->length > 4 && c_strncasecmp(rb->data, "HTTP/", 5))
1388 return -2;
1390 for (i = 0; i < rb->length; i++) {
1391 unsigned char a0 = rb->data[i];
1392 unsigned char a1 = rb->data[i + 1];
1394 if (a0 == 0) {
1395 rb->data[i] = ' ';
1396 continue;
1398 if (a0 == ASCII_LF && a1 == ASCII_LF
1399 && i < rb->length - 1)
1400 return i + 2;
1401 if (a0 == ASCII_CR && i < rb->length - 3) {
1402 if (a1 == ASCII_CR) continue;
1403 if (a1 != ASCII_LF) return -1;
1404 if (rb->data[i + 2] == ASCII_CR) {
1405 if (rb->data[i + 3] != ASCII_LF) return -1;
1406 return i + 4;
1411 return 0;
1414 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1415 static int
1416 check_http_authentication(struct connection *conn, struct uri *uri,
1417 unsigned char *header, unsigned char *header_field)
1419 unsigned char *str, *d;
1420 int ret = 0;
1422 d = parse_header(header, header_field, &str);
1423 while (d) {
1424 if (!c_strncasecmp(d, "Basic", 5)) {
1425 unsigned char *realm = get_header_param(d, "realm");
1427 if (realm) {
1428 add_auth_entry(uri, realm, NULL, NULL, 0);
1429 mem_free(realm);
1430 mem_free(d);
1431 break;
1433 } else if (!c_strncasecmp(d, "Digest", 6)) {
1434 unsigned char *realm = get_header_param(d, "realm");
1435 unsigned char *nonce = get_header_param(d, "nonce");
1436 unsigned char *opaque = get_header_param(d, "opaque");
1438 add_auth_entry(uri, realm, nonce, opaque, 1);
1440 mem_free_if(realm);
1441 mem_free_if(nonce);
1442 mem_free_if(opaque);
1443 mem_free(d);
1444 break;
1446 #ifdef CONFIG_GSSAPI
1447 else if (!c_strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1448 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1449 ret = 1;
1450 mem_free(d);
1451 break;
1453 else if (!c_strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1454 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1455 ret = 1;
1456 mem_free(d);
1457 break;
1459 #endif
1460 mem_free(d);
1461 d = parse_header(str, header_field, &str);
1463 return ret;
1467 void
1468 http_got_header(struct socket *socket, struct read_buffer *rb)
1470 struct connection *conn = socket->conn;
1471 struct http_connection_info *http = conn->info;
1472 unsigned char *head;
1473 #ifdef CONFIG_COOKIES
1474 unsigned char *cookie, *ch;
1475 #endif
1476 unsigned char *d;
1477 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1478 struct http_version version = { 0, 9 };
1479 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1480 ? connection_state(S_GETH)
1481 : connection_state(S_PROC));
1482 int a, h = 200;
1483 int cf;
1485 if (socket->state == SOCKET_CLOSED) {
1486 if (!conn->tries && uri->host) {
1487 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1488 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1489 } else {
1490 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1491 conn->tries = -1;
1494 retry_connection(conn, connection_state(S_CANT_READ));
1495 return;
1497 socket->state = SOCKET_RETRY_ONCLOSE;
1499 again:
1500 a = get_header(rb);
1501 if (a == -1) {
1502 abort_connection(conn, connection_state(S_HTTP_ERROR));
1503 return;
1505 if (!a) {
1506 read_from_socket(conn->socket, rb, state, http_got_header);
1507 return;
1509 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1510 * the get_http_code call; @h and @version have already been
1511 * initialized with the right values. */
1512 if (a == -2) a = 0;
1513 if ((a && get_http_code(rb, &h, &version))
1514 || h == 101) {
1515 abort_connection(conn, connection_state(S_HTTP_ERROR));
1516 return;
1519 /* When no header, HTTP/0.9 document. That's always text/html,
1520 * according to
1521 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1522 /* FIXME: This usage of fake protocol headers for setting up the
1523 * content type has been obsoleted by the @content_type member of
1524 * {struct cache_entry}. */
1525 head = (a ? memacpy(rb->data, a)
1526 : stracpy("\r\nContent-Type: text/html\r\n"));
1527 if (!head) {
1528 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1529 return;
1532 if (check_http_server_bugs(uri, http, head)) {
1533 mem_free(head);
1534 retry_connection(conn, connection_state(S_RESTART));
1535 return;
1538 #ifdef CONFIG_CGI
1539 if (uri->protocol == PROTOCOL_FILE) {
1540 /* ``Status'' is not a standard HTTP header field although some
1541 * HTTP servers like www.php.net uses it for some reason. It should
1542 * only be used for CGI scripts so that it does not interfere
1543 * with status code depended handling for ``normal'' HTTP like
1544 * redirects. */
1545 d = parse_header(head, "Status", NULL);
1546 if (d) {
1547 int h2 = atoi(d);
1549 mem_free(d);
1550 if (h2 >= 100 && h2 < 600) h = h2;
1551 if (h == 101) {
1552 mem_free(head);
1553 abort_connection(conn, connection_state(S_HTTP_ERROR));
1554 return;
1558 #endif
1560 #ifdef CONFIG_COOKIES
1561 ch = head;
1562 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1563 set_cookie(uri, cookie);
1564 mem_free(cookie);
1566 #endif
1567 http->code = h;
1569 if (h == 100) {
1570 mem_free(head);
1571 state = connection_state(S_PROC);
1572 kill_buffer_data(rb, a);
1573 goto again;
1575 if (h < 200) {
1576 mem_free(head);
1577 abort_connection(conn, connection_state(S_HTTP_ERROR));
1578 return;
1580 if (h == 304) {
1581 mem_free(head);
1582 http_end_request(conn, connection_state(S_OK), 1);
1583 return;
1585 if (h == 204) {
1586 mem_free(head);
1587 http_end_request(conn, connection_state(S_HTTP_204), 0);
1588 return;
1590 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1591 #ifdef CONFIG_SSL
1592 mem_free(head);
1593 socket->need_ssl = 1;
1594 complete_connect_socket(socket, uri, http_send_header);
1595 #else
1596 abort_connection(conn, connection_state(S_SSL_ERROR));
1597 #endif
1598 return;
1601 conn->cached = get_cache_entry(conn->uri);
1602 if (!conn->cached) {
1603 mem_free(head);
1604 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1605 return;
1607 conn->cached->cgi = conn->cgi;
1608 mem_free_set(&conn->cached->head, head);
1610 if (!get_opt_bool("document.cache.ignore_cache_control", NULL)) {
1611 struct cache_entry *cached = conn->cached;
1613 /* I am not entirely sure in what order we should process these
1614 * headers and if we should still process Cache-Control max-age
1615 * if we already set max age to date mentioned in Expires.
1616 * --jonas */
1617 /* Ensure that when ever cached->max_age is set, cached->expired
1618 * is also set, so the cache management knows max_age contains a
1619 * valid time. If on the other hand no caching is requested
1620 * cached->expire should be set to zero. */
1621 if ((d = parse_header(cached->head, "Expires", NULL))) {
1622 /* Convert date to seconds. */
1623 time_t expires = parse_date(&d, NULL, 0, 1);
1625 mem_free(d);
1627 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1628 timeval_from_seconds(&cached->max_age, expires);
1629 cached->expire = 1;
1633 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1634 if (strstr(d, "no-cache")) {
1635 cached->cache_mode = CACHE_MODE_NEVER;
1636 cached->expire = 0;
1638 mem_free(d);
1641 if (cached->cache_mode != CACHE_MODE_NEVER
1642 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1643 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1644 cached->cache_mode = CACHE_MODE_NEVER;
1645 cached->expire = 0;
1647 } else {
1648 unsigned char *pos = strstr(d, "max-age=");
1650 assert(cached->cache_mode != CACHE_MODE_NEVER);
1652 if (pos) {
1653 /* Grab the number of seconds. */
1654 timeval_T max_age;
1656 timeval_from_seconds(&max_age, atol(pos + 8));
1657 timeval_now(&cached->max_age);
1658 timeval_add_interval(&cached->max_age, &max_age);
1660 cached->expire = 1;
1664 mem_free(d);
1668 /* XXX: Is there some reason why NOT to follow the Location header
1669 * for any status? If the server didn't mean it, it wouldn't send
1670 * it, after all...? --pasky */
1671 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1672 d = parse_header(conn->cached->head, "Location", NULL);
1673 if (d) {
1674 int use_get_method = (h == 303);
1676 /* A note from RFC 2616 section 10.3.3:
1677 * RFC 1945 and RFC 2068 specify that the client is not
1678 * allowed to change the method on the redirected
1679 * request. However, most existing user agent
1680 * implementations treat 302 as if it were a 303
1681 * response, performing a GET on the Location
1682 * field-value regardless of the original request
1683 * method. */
1684 /* So POST must not be redirected to GET, but some
1685 * BUGGY message boards rely on it :-( */
1686 if (h == 302
1687 && get_opt_bool("protocol.http.bugs.broken_302_redirect", NULL))
1688 use_get_method = 1;
1690 redirect_cache(conn->cached, d, use_get_method, -1);
1691 mem_free(d);
1695 if (h == 401) {
1696 if (check_http_authentication(conn, uri,
1697 conn->cached->head, "WWW-Authenticate")) {
1698 retry_connection(conn, connection_state(S_RESTART));
1699 return;
1703 if (h == 407) {
1704 unsigned char *str;
1705 int restart = 0;
1707 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1708 while (d) {
1709 if (!c_strncasecmp(d, "Basic", 5)) {
1710 unsigned char *realm = get_header_param(d, "realm");
1712 if (realm) {
1713 mem_free_set(&proxy_auth.realm, realm);
1714 proxy_auth.digest = 0;
1715 mem_free(d);
1716 break;
1719 } else if (!c_strncasecmp(d, "Digest", 6)) {
1720 unsigned char *realm = get_header_param(d, "realm");
1721 unsigned char *nonce = get_header_param(d, "nonce");
1722 unsigned char *opaque = get_header_param(d, "opaque");
1723 unsigned char *stale = get_header_param(d, "stale");
1725 if (stale) {
1726 if (strcasecmp(stale, "true")) restart = 1;
1727 else restart = 0;
1728 mem_free(stale);
1730 mem_free_set(&proxy_auth.realm, realm);
1731 mem_free_set(&proxy_auth.nonce, nonce);
1732 mem_free_set(&proxy_auth.opaque, opaque);
1733 if (proxy_auth.digest == 0) restart = 1;
1734 proxy_auth.digest = 1;
1736 mem_free(d);
1737 break;
1740 mem_free(d);
1741 d = parse_header(str, "Proxy-Authenticate", &str);
1743 if (restart) {
1744 retry_connection(conn, connection_state(S_RESTART));
1745 return;
1749 kill_buffer_data(rb, a);
1750 http->close = 0;
1751 http->length = -1;
1752 http->recv_version = version;
1754 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1755 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1756 if (!c_strcasecmp(d, "close")) http->close = 1;
1757 mem_free(d);
1758 } else if (PRE_HTTP_1_1(version)) {
1759 http->close = 1;
1762 cf = conn->from;
1763 conn->from = 0;
1764 d = parse_header(conn->cached->head, "Content-Range", NULL);
1765 if (d) {
1766 if (strlen(d) > 6) {
1767 d[5] = 0;
1768 if (isdigit(d[6]) && !c_strcasecmp(d, "bytes")) {
1769 int f;
1771 errno = 0;
1772 f = strtol(d + 6, NULL, 10);
1774 if (!errno && f >= 0) conn->from = f;
1777 mem_free(d);
1779 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1780 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1781 /* We don't want this if conn->progress.start because then conn->from will
1782 * be probably value of conn->progress.start, while cf is 0. */
1783 abort_connection(conn, connection_state(S_HTTP_ERROR));
1784 return;
1787 #if 0
1789 struct status *s;
1790 foreach (s, conn->downloads) {
1791 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1792 conn, s, s->pri, s->state, s->prev_error,
1793 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1796 #endif
1798 if (conn->progress->start >= 0) {
1799 /* Update to the real value which we've got from Content-Range. */
1800 conn->progress->seek = conn->from;
1802 conn->progress->start = conn->from;
1804 d = parse_header(conn->cached->head, "Content-Length", NULL);
1805 if (d) {
1806 unsigned char *ep;
1807 long long l;
1809 errno = 0;
1810 l = strtoll(d, (char **) &ep, 10);
1812 if (!errno && !*ep && l >= 0) {
1813 if (!http->close || POST_HTTP_1_0(version))
1814 http->length = l;
1815 conn->est_length = conn->from + l;
1817 mem_free(d);
1820 if (!conn->unrestartable) {
1821 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1823 if (d) {
1824 if (!c_strcasecmp(d, "none"))
1825 conn->unrestartable = 1;
1826 mem_free(d);
1827 } else {
1828 if (!conn->from)
1829 conn->unrestartable = 1;
1833 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1834 if (d) {
1835 if (!c_strcasecmp(d, "chunked")) {
1836 http->length = LEN_CHUNKED;
1837 http->chunk_remaining = CHUNK_SIZE;
1839 mem_free(d);
1841 if (!http->close && http->length == -1) http->close = 1;
1843 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1844 if (d) {
1845 if (conn->cached->last_modified && c_strcasecmp(conn->cached->last_modified, d)) {
1846 delete_entry_content(conn->cached);
1847 if (conn->from) {
1848 conn->from = 0;
1849 mem_free(d);
1850 retry_connection(conn, connection_state(S_MODIFIED));
1851 return;
1854 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1855 else mem_free(d);
1857 if (!conn->cached->last_modified) {
1858 d = parse_header(conn->cached->head, "Date", NULL);
1859 if (d) conn->cached->last_modified = d;
1862 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1863 d = parse_header(conn->cached->head, "ETag", NULL);
1864 if (d) {
1865 if (conn->cached->etag) {
1866 unsigned char *old_tag = conn->cached->etag;
1867 unsigned char *new_tag = d;
1869 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1871 if (new_tag[0] == 'W' && new_tag[1] == '/')
1872 new_tag += 2;
1874 if (old_tag[0] == 'W' && old_tag[1] == '/')
1875 old_tag += 2;
1877 if (strcmp(new_tag, old_tag)) {
1878 delete_entry_content(conn->cached);
1879 if (conn->from) {
1880 conn->from = 0;
1881 mem_free(d);
1882 retry_connection(conn, connection_state(S_MODIFIED));
1883 return;
1888 if (!conn->cached->etag)
1889 conn->cached->etag = d;
1890 else
1891 mem_free(d);
1894 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1895 if (d) {
1896 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
1897 unsigned char *extension = get_extension_from_uri(uri);
1898 enum stream_encoding file_encoding;
1900 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1901 mem_free_if(extension);
1902 #endif
1903 /* If the content is encoded, we want to preserve the encoding
1904 * if it is implied by the extension, so that saving the URI
1905 * will leave the saved file with the correct encoding. */
1906 #ifdef CONFIG_GZIP
1907 if (file_encoding != ENCODING_GZIP
1908 && (!c_strcasecmp(d, "gzip") || !c_strcasecmp(d, "x-gzip")))
1909 conn->content_encoding = ENCODING_GZIP;
1910 if (!c_strcasecmp(d, "deflate") || !c_strcasecmp(d, "x-deflate"))
1911 conn->content_encoding = ENCODING_DEFLATE;
1912 #endif
1914 #ifdef CONFIG_BZIP2
1915 if (file_encoding != ENCODING_BZIP2
1916 && (!c_strcasecmp(d, "bzip2") || !c_strcasecmp(d, "x-bzip2")))
1917 conn->content_encoding = ENCODING_BZIP2;
1918 #endif
1920 #ifdef CONFIG_LZMA
1921 if (file_encoding != ENCODING_LZMA
1922 && (!c_strcasecmp(d, "lzma") || !c_strcasecmp(d, "x-lzma")))
1923 conn->content_encoding = ENCODING_LZMA;
1924 #endif
1925 mem_free(d);
1928 if (conn->content_encoding != ENCODING_NONE) {
1929 mem_free_if(conn->cached->encoding_info);
1930 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1933 if (http->length == -1 || http->close)
1934 socket->state = SOCKET_END_ONCLOSE;
1936 read_http_data(socket, rb);