new
[libcurl.git] / lib / download.c
blobe262c42aa3fdc89537bde6aeb94902bdb97d4997
1 /*****************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * The contents of this file are subject to the Mozilla Public License
9 * Version 1.0 (the "License"); you may not use this file except in
10 * compliance with the License. You may obtain a copy of the License at
11 * http://www.mozilla.org/MPL/
13 * Software distributed under the License is distributed on an "AS IS"
14 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
15 * License for the specific language governing rights and limitations
16 * under the License.
18 * The Original Code is Curl.
20 * The Initial Developer of the Original Code is Daniel Stenberg.
22 * Portions created by the Initial Developer are Copyright (C) 1998.
23 * All Rights Reserved.
25 * ------------------------------------------------------------
26 * Main author:
27 * - Daniel Stenberg <Daniel.Stenberg@haxx.nu>
29 * http://curl.haxx.nu
31 * $Source: /cvsroot/curl/curl/lib/Attic/download.c,v $
32 * $Revision: 1.1.1.1 $
33 * $Date: 1999-12-29 14:21:20 $
34 * $Author: bagder $
35 * $State: Exp $
36 * $Locker: $
38 * ------------------------------------------------------------
39 ****************************************************************************/
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
45 #include "setup.h"
47 #ifdef HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
50 #ifdef HAVE_SYS_SELECT_H
51 #include <sys/select.h>
52 #endif
54 #include "urldata.h"
55 #include <curl/curl.h>
57 #ifdef __BEOS__
58 #include <net/socket.h>
59 #endif
61 #ifdef WIN32
62 #if !defined( __GNUC__) || defined(__MINGW32__)
63 #include <winsock.h>
64 #endif
65 #include <time.h> /* for the time_t typedef! */
67 #if defined(__GNUC__) && defined(TIME_WITH_SYS_TIME)
68 #include <sys/time.h>
69 #endif
71 #endif
73 #include "progress.h"
74 #include "speedcheck.h"
75 #include "sendf.h"
77 #ifdef USE_ZLIB
78 #include <zlib.h>
79 #endif
81 #define MAX(x,y) ((x)>(y)?(x):(y))
83 /* --- download a stream from a socket --- */
85 /* This newly edited version of Download() was brought to us by the friendly
86 Mark Butler <butlerm@xmission.com>. Re-indented with the indent command. */
88 UrgError
89 Download (struct UrlData *data,
90 int sockfd, /* socket to read from */
91 int size, /* -1 if unknown at this point */
92 bool getheader, /* TRUE if header parsing is wanted */
93 long *bytecountp /* return number of bytes read */
96 char *buf = data->buffer;
97 size_t nread;
98 int bytecount = 0;
99 long contentlength=0;
100 struct timeval start = tvnow();
101 struct timeval now = start;
102 bool header = TRUE;
103 int headerline = 0; /* counts header lines to better track the first one */
105 char *hbufp; /* points at *end* of header line */
106 int hbuflen = 0;
107 char *str; /* within buf */
108 char *str_start; /* within buf */
109 char *end_ptr; /* within buf */
110 char *p; /* within headerbuff */
111 bool content_range = FALSE; /* set TRUE if Content-Range: was found */
112 int offset = 0; /* possible resume offset read from the
113 Content-Range: header */
114 int code = 0; /* error code from the 'HTTP/1.? XXX' line */
115 #ifdef USE_ZLIB
116 gzFile gzfile=NULL;
117 #endif
119 /* for the low speed checks: */
120 UrgError urg;
121 time_t timeofdoc=0;
122 long bodywrites=0;
124 char newurl[URL_MAX_LENGTH]; /* buffer for Location: URL */
126 hbufp = data->headerbuff;
128 myalarm (0); /* switch off the alarm-style timeout */
130 now = tvnow();
131 start = now;
133 if (!getheader) {
134 header = FALSE;
135 ProgressInit (data, size);
138 fd_set readfd;
139 fd_set keepfd;
140 struct timeval interval;
141 bool keepon = TRUE;
143 /* timeout every X second
144 - makes a better progressmeter (i.e even when no data is read, the
145 meter can be updated and reflect reality)
146 - allows removal of the alarm() crap
147 - variable timeout is easier
150 FD_ZERO (&readfd); /* clear it */
151 FD_SET (sockfd, &readfd);
153 keepfd = readfd;
154 #ifdef USE_ZLIB
155 gzfile = gzdopen(sockfd, "rb");
156 #endif
157 while (keepon) {
158 readfd = keepfd; /* set this every lap in the loop */
159 interval.tv_sec = 2;
160 interval.tv_usec = 0;
162 switch (select (sockfd + 1, &readfd, NULL, NULL, &interval)) {
163 case -1: /* error, stop reading */
164 keepon = FALSE;
165 continue;
166 case 0: /* timeout */
167 break;
168 default: /* read! */
169 #ifdef USE_SSLEAY
170 if (data->use_ssl) {
171 nread = SSL_read (data->ssl, buf, BUFSIZE - 1);
173 else {
174 #endif
175 #ifdef USE_ZLIB
176 nread = gzread(gzfile, buf, BUFSIZE -1 );
177 #else
178 nread = sread (sockfd, buf, BUFSIZE - 1);
179 #endif
180 #ifdef USE_SSLEAY
182 #endif /* USE_SSLEAY */
184 /* NULL terminate, allowing string ops to be used */
185 if (0 < (signed int) nread)
186 buf[nread] = 0;
188 /* if we receive 0 or less here, the server closed the connection and
189 we bail out from this! */
190 else if (0 >= (signed int) nread) {
191 keepon = FALSE;
192 break;
195 str = buf; /* Default buffer to use when we write the
196 buffer, it may be changed in the flow below
197 before the actual storing is done. */
199 /* Since this is a two-state thing, we check if we are parsing
200 headers at the moment or not. */
202 if (header) {
203 /* we are in parse-the-header-mode */
205 /* header line within buffer loop */
206 do {
207 int hbufp_index;
209 str_start = str; /* str_start is start of line within buf */
211 end_ptr = strchr (str_start, '\n');
213 if (!end_ptr) {
214 /* no more complete header lines within buffer */
215 /* copy what is remaining into headerbuff */
216 int str_length = (int)strlen(str);
218 if (hbuflen + (int)str_length >= data->headersize) {
219 char *newbuff;
220 long newsize=MAX((hbuflen+str_length)*3/2,
221 data->headersize*2);
222 hbufp_index = hbufp - data->headerbuff;
223 newbuff = (char *)realloc(data->headerbuff, newsize);
224 if(!newbuff) {
225 failf (data, "Failed to alloc memory for big header!");
226 return URG_READ_ERROR;
228 data->headersize=newsize;
229 data->headerbuff = newbuff;
230 hbufp = data->headerbuff + hbufp_index;
232 strcpy (hbufp, str);
233 hbufp += strlen (str);
234 hbuflen += strlen (str);
235 break; /* read more and try again */
238 str = end_ptr + 1; /* move just past new line */
240 if (hbuflen + (str - str_start) >= data->headersize) {
241 char *newbuff;
242 long newsize=MAX((hbuflen+(str-str_start))*3/2,
243 data->headersize*2);
244 hbufp_index = hbufp - data->headerbuff;
245 newbuff = (char *)realloc(data->headerbuff, newsize);
246 if(!newbuff) {
247 failf (data, "Failed to alloc memory for big header!");
248 return URG_READ_ERROR;
250 data->headersize= newsize;
251 data->headerbuff = newbuff;
252 hbufp = data->headerbuff + hbufp_index;
255 /* copy to end of line */
256 strncpy (hbufp, str_start, str - str_start);
257 hbufp += str - str_start;
258 hbuflen += str - str_start;
259 *hbufp = 0;
261 p = data->headerbuff;
263 /* we now have a full line that p points to */
264 if (('\n' == *p) || ('\r' == *p)) {
265 /* Zero-length line means end of header! */
266 if (-1 != size) /* if known */
267 size += bytecount; /* we append the already read size */
270 if ('\r' == *p)
271 p++; /* pass the \r byte */
272 if ('\n' == *p)
273 p++; /* pass the \n byte */
275 ProgressInit (data, size); /* init progress meter */
276 header = FALSE; /* no more header to parse! */
278 /* now, only output this if the header AND body are requested: */
279 if ((data->conf & (CONF_HEADER | CONF_NOBODY)) == CONF_HEADER) {
280 if((p - data->headerbuff) !=
281 data->fwrite (data->headerbuff, 1,
282 p - data->headerbuff, data->out)) {
283 failf (data, "Failed writing output");
284 return URG_WRITE_ERROR;
287 if(data->writeheader) {
288 /* obviously, the header is requested to be written to
289 this file: */
290 if((p - data->headerbuff) !=
291 fwrite (data->headerbuff, 1, p - data->headerbuff,
292 data->writeheader)) {
293 failf (data, "Failed writing output");
294 return URG_WRITE_ERROR;
297 break; /* exit header line loop */
300 if (!headerline++) {
301 /* This is the first header, it MUST be the error code line
302 or else we consiser this to be the body right away! */
303 if (sscanf (p, " HTTP/1.%*c %3d", &code)) {
304 /* 404 -> URL not found! */
305 if (
306 ( ((data->conf & CONF_FOLLOWLOCATION) && (code >= 400)) ||
307 !(data->conf & CONF_FOLLOWLOCATION) && (code >= 300))
308 && (data->conf & CONF_FAILONERROR)) {
309 /* If we have been told to fail hard on HTTP-errors,
310 here is the check for that: */
311 /* serious error, go home! */
312 failf (data, "The requested file was not found");
313 return URG_HTTP_NOT_FOUND;
316 else {
317 header = FALSE; /* this is not a header line */
318 break;
321 /* check for Content-Length: header lines to get size */
322 if (strnequal("Content-Length", p, 14) &&
323 sscanf (p+14, ": %ld", &contentlength))
324 size = contentlength;
325 else if (strnequal("Content-Range", p, 13) &&
326 sscanf (p+13, ": bytes %d-", &offset)) {
327 if (data->resume_from == offset) {
328 /* we asked for a resume and we got it */
329 content_range = TRUE;
332 else if(data->cookies &&
333 strnequal("Set-Cookie: ", p, 11)) {
334 cookie_add(data->cookies, TRUE, &p[12]);
336 else if(strnequal("Last-Modified:", p, strlen("Last-Modified:")) &&
337 data->timecondition) {
338 time_t secs=time(NULL);
339 timeofdoc = get_date(p+strlen("Last-Modified:"), &secs);
341 else if ((code >= 300 && code < 400) &&
342 (data->conf & CONF_FOLLOWLOCATION) &&
343 strnequal("Location", p, 8) &&
344 sscanf (p+8, ": %" URL_MAX_LENGTH_TXT "s", newurl)) {
345 /* this is the URL that the server advices us to get
346 instead */
347 data->newurl = strdup (newurl);
350 if (data->conf & CONF_HEADER) {
351 if(hbuflen != data->fwrite (p, 1, hbuflen, data->out)) {
352 failf (data, "Failed writing output");
353 return URG_WRITE_ERROR;
356 if(data->writeheader) {
357 /* the header is requested to be written to this file */
358 if(hbuflen != fwrite (p, 1, hbuflen, data->writeheader)) {
359 failf (data, "Failed writing output");
360 return URG_WRITE_ERROR;
364 /* reset hbufp pointer && hbuflen */
365 hbufp = data->headerbuff;
366 hbuflen = 0;
368 while (*str); /* header line within buffer */
370 /* We might have reached the end of the header part here, but
371 there might be a non-header part left in the end of the read
372 buffer. */
374 if (!header) {
375 /* the next token and forward is not part of
376 the header! */
378 /* we subtract the remaining header size from the buffer */
379 nread -= (str - buf);
382 } /* end if header mode */
384 /* This is not an 'else if' since it may be a rest from the header
385 parsing, where the beginning of the buffer is headers and the end
386 is non-headers. */
387 if (str && !header && (nread > 0)) {
389 if(0 == bodywrites) {
390 /* These checks are only made the first time we are about to
391 write a chunk of the body */
392 if(data->conf&CONF_HTTP) {
393 /* HTTP-only checks */
394 if (data->resume_from && !content_range ) {
395 /* we wanted to resume a download, although the server doesn't
396 seem to support this */
397 failf (data, "HTTP server doesn't seem to support byte ranges. Cannot resume.");
398 return URG_HTTP_RANGE_ERROR;
400 else if (data->newurl) {
401 /* abort after the headers if "follow Location" is set */
402 infof (data, "Follow to new URL: %s\n", data->newurl);
403 return URG_OK;
405 else if(data->timecondition && !data->range) {
406 /* A time condition has been set AND no ranges have been
407 requested. This seems to be what chapter 13.3.4 of RFC 2616
408 defines to be the correct action for a HTTP/1.1 client */
409 if((timeofdoc > 0) && (data->timevalue > 0)) {
410 switch(data->timecondition) {
411 case TIMECOND_IFMODSINCE:
412 default:
413 if(timeofdoc < data->timevalue) {
414 infof(data, "The requested document is not new enough");
415 return URG_OK;
417 break;
418 case TIMECOND_IFUNMODSINCE:
419 if(timeofdoc > data->timevalue) {
420 infof(data, "The requested document is not old enough");
421 return URG_OK;
423 break;
424 } /* switch */
425 } /* two valid time strings */
426 } /* we have a time condition */
427 } /* this is HTTP */
428 } /* this is the first time we write a body part */
429 bodywrites++;
431 if(data->maxdownload &&
432 (bytecount + nread > data->maxdownload)) {
433 nread = data->maxdownload - bytecount;
434 if(nread < 0 ) /* this should be unusual */
435 nread = 0;
436 keepon = FALSE; /* we're done now! */
439 bytecount += nread;
441 if (nread != data->fwrite (str, 1, nread, data->out)) {
442 failf (data, "Failed writing output");
443 return URG_WRITE_ERROR;
447 break;
449 now = tvnow();
450 if (!header) {
451 ProgressShow (data, bytecount, start, now, FALSE);
453 urg = speedcheck (data, now);
454 if (urg)
455 return urg;
457 if (data->timeout && (tvdiff (now, start) > data->timeout)) {
458 failf (data, "Operation timed out with %d out of %d bytes received",
459 bytecount, size);
460 return URG_OPERATION_TIMEOUTED;
462 #ifdef MULTIDOC
463 if(contentlength && bytecount >= contentlength) {
464 /* we're done with this download, now stop it */
465 break;
467 #endif
470 if(contentlength && (bytecount != contentlength)) {
471 failf(data, "transfer closed with %d bytes remaining", contentlength-bytecount);
472 return URG_PARTIAL_FILE;
474 ProgressShow (data, bytecount, start, now, TRUE);
476 *bytecountp = bytecount;
478 #ifdef USE_ZLIB
479 gzclose(gzfile);
480 #endif
481 return URG_OK;