modified: diffout.py
[GalaxyCodeBases.git] / c_cpp / lib / htslib / hfile_libcurl.c
blobea99aa741cc33e5cb60bd164f17a6a6374a65eab
1 /* hfile_libcurl.c -- libcurl backend for low-level file streams.
3 Copyright (C) 2015-2017 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
25 #include <config.h>
27 #include <stdarg.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <sys/select.h>
33 #include "hfile_internal.h"
34 #ifdef ENABLE_PLUGINS
35 #include "version.h"
36 #endif
37 #include "htslib/hts.h" // for hts_version() and hts_verbose
38 #include "htslib/kstring.h"
40 #include <curl/curl.h>
42 typedef struct {
43 hFILE base;
44 CURL *easy;
45 struct curl_slist *headers;
46 off_t file_size;
47 struct {
48 union { char *rd; const char *wr; } ptr;
49 size_t len;
50 } buffer;
51 CURLcode final_result; // easy result code for finished transfers
52 // Flags for communicating with libcurl callbacks:
53 unsigned paused : 1; // callback tells us that it has paused transfer
54 unsigned closing : 1; // informs callback that hclose() has been invoked
55 unsigned finished : 1; // wait_perform() tells us transfer is complete
56 } hFILE_libcurl;
58 static int http_status_errno(int status)
60 if (status >= 500)
61 switch (status) {
62 case 501: return ENOSYS;
63 case 503: return EBUSY;
64 case 504: return ETIMEDOUT;
65 default: return EIO;
67 else if (status >= 400)
68 switch (status) {
69 case 401: return EPERM;
70 case 403: return EACCES;
71 case 404: return ENOENT;
72 case 405: return EROFS;
73 case 407: return EPERM;
74 case 408: return ETIMEDOUT;
75 case 410: return ENOENT;
76 default: return EINVAL;
78 else return 0;
81 static int easy_errno(CURL *easy, CURLcode err)
83 long lval;
85 switch (err) {
86 case CURLE_OK:
87 return 0;
89 case CURLE_UNSUPPORTED_PROTOCOL:
90 case CURLE_URL_MALFORMAT:
91 return EINVAL;
93 #if LIBCURL_VERSION_NUM >= 0x071505
94 case CURLE_NOT_BUILT_IN:
95 return ENOSYS;
96 #endif
98 case CURLE_COULDNT_RESOLVE_PROXY:
99 case CURLE_COULDNT_RESOLVE_HOST:
100 case CURLE_FTP_CANT_GET_HOST:
101 return EDESTADDRREQ; // Lookup failure
103 case CURLE_COULDNT_CONNECT:
104 case CURLE_SEND_ERROR:
105 case CURLE_RECV_ERROR:
106 if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
107 return lval;
108 else
109 return ECONNABORTED;
111 case CURLE_REMOTE_ACCESS_DENIED:
112 case CURLE_LOGIN_DENIED:
113 case CURLE_TFTP_PERM:
114 return EACCES;
116 case CURLE_PARTIAL_FILE:
117 return EPIPE;
119 case CURLE_HTTP_RETURNED_ERROR:
120 if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
121 return http_status_errno(lval);
122 else
123 return EIO;
125 case CURLE_OUT_OF_MEMORY:
126 return ENOMEM;
128 case CURLE_OPERATION_TIMEDOUT:
129 return ETIMEDOUT;
131 case CURLE_RANGE_ERROR:
132 return ESPIPE;
134 case CURLE_SSL_CONNECT_ERROR:
135 // TODO return SSL error buffer messages
136 return ECONNABORTED;
138 case CURLE_FILE_COULDNT_READ_FILE:
139 case CURLE_TFTP_NOTFOUND:
140 return ENOENT;
142 case CURLE_TOO_MANY_REDIRECTS:
143 return ELOOP;
145 case CURLE_FILESIZE_EXCEEDED:
146 return EFBIG;
148 case CURLE_REMOTE_DISK_FULL:
149 return ENOSPC;
151 case CURLE_REMOTE_FILE_EXISTS:
152 return EEXIST;
154 default:
155 return EIO;
159 static int multi_errno(CURLMcode errm)
161 switch (errm) {
162 case CURLM_CALL_MULTI_PERFORM:
163 case CURLM_OK:
164 return 0;
166 case CURLM_BAD_HANDLE:
167 case CURLM_BAD_EASY_HANDLE:
168 case CURLM_BAD_SOCKET:
169 return EBADF;
171 case CURLM_OUT_OF_MEMORY:
172 return ENOMEM;
174 default:
175 return EIO;
180 static struct {
181 CURLM *multi;
182 kstring_t useragent;
183 int nrunning;
184 unsigned perform_again : 1;
185 } curl = { NULL, { 0, 0, NULL }, 0, 0 };
187 static void libcurl_exit()
189 (void) curl_multi_cleanup(curl.multi);
190 curl.multi = NULL;
192 free(curl.useragent.s);
193 curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
195 curl_global_cleanup();
199 static void process_messages()
201 CURLMsg *msg;
202 int remaining;
204 while ((msg = curl_multi_info_read(curl.multi, &remaining)) != NULL) {
205 hFILE_libcurl *fp = NULL;
206 curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char **) &fp);
207 switch (msg->msg) {
208 case CURLMSG_DONE:
209 fp->finished = 1;
210 fp->final_result = msg->data.result;
211 break;
213 default:
214 break;
219 static int wait_perform()
221 fd_set rd, wr, ex;
222 int maxfd, nrunning;
223 long timeout;
224 CURLMcode errm;
226 FD_ZERO(&rd);
227 FD_ZERO(&wr);
228 FD_ZERO(&ex);
229 if (curl_multi_fdset(curl.multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
230 maxfd = -1, timeout = 1000;
231 else if (maxfd < 0)
232 timeout = 100; // as recommended by curl_multi_fdset(3)
233 else {
234 if (curl_multi_timeout(curl.multi, &timeout) != CURLM_OK)
235 timeout = 1000;
236 else if (timeout < 0)
237 timeout = 10000; // as recommended by curl_multi_timeout(3)
240 if (timeout > 0 && ! curl.perform_again) {
241 struct timeval tval;
242 tval.tv_sec = (timeout / 1000);
243 tval.tv_usec = (timeout % 1000) * 1000;
245 if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
248 errm = curl_multi_perform(curl.multi, &nrunning);
249 curl.perform_again = 0;
250 if (errm == CURLM_CALL_MULTI_PERFORM) curl.perform_again = 1;
251 else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
253 if (nrunning < curl.nrunning) process_messages();
254 return 0;
258 static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
260 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
261 size_t n = size * nmemb;
263 if (n > fp->buffer.len) { fp->paused = 1; return CURL_WRITEFUNC_PAUSE; }
264 else if (n == 0) return 0;
266 memcpy(fp->buffer.ptr.rd, ptr, n);
267 fp->buffer.ptr.rd += n;
268 fp->buffer.len -= n;
269 return n;
272 static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
274 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
275 char *buffer = (char *) bufferv;
276 CURLcode err;
278 fp->buffer.ptr.rd = buffer;
279 fp->buffer.len = nbytes;
280 fp->paused = 0;
281 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
282 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
284 while (! fp->paused && ! fp->finished)
285 if (wait_perform() < 0) return -1;
287 nbytes = fp->buffer.ptr.rd - buffer;
288 fp->buffer.ptr.rd = NULL;
289 fp->buffer.len = 0;
291 if (fp->finished && fp->final_result != CURLE_OK) {
292 errno = easy_errno(fp->easy, fp->final_result);
293 return -1;
296 return nbytes;
299 static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
301 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
302 size_t n = size * nmemb;
304 if (fp->buffer.len == 0) {
305 // Send buffer is empty; normally pause, or signal EOF if we're closing
306 if (fp->closing) return 0;
307 else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
310 if (n > fp->buffer.len) n = fp->buffer.len;
311 memcpy(ptr, fp->buffer.ptr.wr, n);
312 fp->buffer.ptr.wr += n;
313 fp->buffer.len -= n;
314 return n;
317 static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
319 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
320 const char *buffer = (const char *) bufferv;
321 CURLcode err;
323 fp->buffer.ptr.wr = buffer;
324 fp->buffer.len = nbytes;
325 fp->paused = 0;
326 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
327 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
329 while (! fp->paused && ! fp->finished)
330 if (wait_perform() < 0) return -1;
332 nbytes = fp->buffer.ptr.wr - buffer;
333 fp->buffer.ptr.wr = NULL;
334 fp->buffer.len = 0;
336 if (fp->finished && fp->final_result != CURLE_OK) {
337 errno = easy_errno(fp->easy, fp->final_result);
338 return -1;
341 return nbytes;
344 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
346 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
348 CURLcode err;
349 CURLMcode errm;
350 off_t origin, pos;
352 switch (whence) {
353 case SEEK_SET:
354 origin = 0;
355 break;
356 case SEEK_CUR:
357 errno = ENOSYS;
358 return -1;
359 case SEEK_END:
360 if (fp->file_size < 0) { errno = ESPIPE; return -1; }
361 origin = fp->file_size;
362 break;
363 default:
364 errno = EINVAL;
365 return -1;
368 // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
369 if ((offset < 0)? origin + offset < 0
370 : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
371 errno = EINVAL;
372 return -1;
375 pos = origin + offset;
377 errm = curl_multi_remove_handle(curl.multi, fp->easy);
378 if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
379 curl.nrunning--;
381 // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
382 // limited reads (e.g. about a BAM block!) so seeking can reuse the
383 // existing connection more often.
385 err = curl_easy_setopt(fp->easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
386 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
388 fp->buffer.len = 0;
389 fp->paused = fp->finished = 0;
391 errm = curl_multi_add_handle(curl.multi, fp->easy);
392 if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
393 curl.nrunning++;
395 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
396 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
398 while (! fp->paused && ! fp->finished)
399 if (wait_perform() < 0) return -1;
401 if (fp->finished && fp->final_result != CURLE_OK) {
402 errno = easy_errno(fp->easy, fp->final_result);
403 return -1;
406 return pos;
409 static int libcurl_close(hFILE *fpv)
411 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
412 CURLcode err;
413 CURLMcode errm;
414 int save_errno = 0;
416 // Before closing the file, unpause it and perform on it so that uploads
417 // have the opportunity to signal EOF to the server -- see send_callback().
419 fp->buffer.len = 0;
420 fp->closing = 1;
421 fp->paused = 0;
422 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
423 if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
425 while (save_errno == 0 && ! fp->paused && ! fp->finished)
426 if (wait_perform() < 0) save_errno = errno;
428 if (fp->finished && fp->final_result != CURLE_OK)
429 save_errno = easy_errno(fp->easy, fp->final_result);
431 errm = curl_multi_remove_handle(curl.multi, fp->easy);
432 if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
433 curl.nrunning--;
435 curl_easy_cleanup(fp->easy);
437 if (save_errno) { errno = save_errno; return -1; }
438 else return 0;
441 static const struct hFILE_backend libcurl_backend =
443 libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
446 static hFILE *
447 libcurl_open(const char *url, const char *modes, struct curl_slist *headers)
449 hFILE_libcurl *fp;
450 char mode;
451 const char *s;
452 CURLcode err;
453 CURLMcode errm;
454 int save;
456 if ((s = strpbrk(modes, "rwa+")) != NULL) {
457 mode = *s;
458 if (strpbrk(&s[1], "rwa+")) mode = 'e';
460 else mode = '\0';
462 if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
464 fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
465 if (fp == NULL) goto early_error;
467 fp->headers = headers;
468 fp->file_size = -1;
469 fp->buffer.ptr.rd = NULL;
470 fp->buffer.len = 0;
471 fp->final_result = (CURLcode) -1;
472 fp->paused = fp->closing = fp->finished = 0;
474 fp->easy = curl_easy_init();
475 if (fp->easy == NULL) { errno = ENOMEM; goto error; }
477 // Make a route to the hFILE_libcurl* given just a CURL* easy handle
478 err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
480 if (mode == 'r') {
481 err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
482 err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
484 else {
485 struct curl_slist *list;
487 err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
488 err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
489 err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
491 list = curl_slist_append(fp->headers, "Transfer-Encoding: chunked");
492 if (list) fp->headers = list; else goto error;
495 err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
496 err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
497 if (fp->headers)
498 err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, fp->headers);
499 err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
500 if (hts_verbose <= 8)
501 err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
502 if (hts_verbose >= 8)
503 err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
505 if (err != 0) { errno = ENOSYS; goto error; }
507 errm = curl_multi_add_handle(curl.multi, fp->easy);
508 if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
509 curl.nrunning++;
511 while (! fp->paused && ! fp->finished)
512 if (wait_perform() < 0) goto error_remove;
514 if (fp->finished && fp->final_result != CURLE_OK) {
515 errno = easy_errno(fp->easy, fp->final_result);
516 goto error_remove;
519 if (mode == 'r') {
520 double dval;
521 if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
522 &dval) == CURLE_OK && dval >= 0.0)
523 fp->file_size = (off_t) (dval + 0.1);
526 fp->base.backend = &libcurl_backend;
527 return &fp->base;
529 error_remove:
530 save = errno;
531 (void) curl_multi_remove_handle(curl.multi, fp->easy);
532 curl.nrunning--;
533 errno = save;
535 error:
536 save = errno;
537 if (fp->easy) curl_easy_cleanup(fp->easy);
538 if (fp->headers) curl_slist_free_all(fp->headers);
539 hfile_destroy((hFILE *) fp);
540 errno = save;
541 return NULL;
543 early_error:
544 save = errno;
545 if (headers) curl_slist_free_all(headers);
546 errno = save;
547 return NULL;
550 static hFILE *hopen_libcurl(const char *url, const char *modes)
552 return libcurl_open(url, modes, NULL);
555 static int parse_va_list(struct curl_slist **headers, va_list args)
557 const char *argtype;
559 while ((argtype = va_arg(args, const char *)) != NULL)
560 if (strcmp(argtype, "httphdr:v") == 0) {
561 const char **hdr;
562 for (hdr = va_arg(args, const char **); *hdr; hdr++) {
563 struct curl_slist *list = curl_slist_append(*headers, *hdr);
564 if (list) *headers = list; else return -1;
567 else if (strcmp(argtype, "httphdr:l") == 0) {
568 const char *hdr;
569 while ((hdr = va_arg(args, const char *)) != NULL) {
570 struct curl_slist *list = curl_slist_append(*headers, hdr);
571 if (list) *headers = list; else return -1;
574 else if (strcmp(argtype, "httphdr") == 0) {
575 const char *hdr = va_arg(args, const char *);
576 if (hdr) {
577 struct curl_slist *list = curl_slist_append(*headers, hdr);
578 if (list) *headers = list; else return -1;
581 else if (strcmp(argtype, "va_list") == 0) {
582 va_list *args2 = va_arg(args, va_list *);
583 if (args2) {
584 if (parse_va_list(headers, *args2) < 0) return -1;
587 else { errno = EINVAL; return -1; }
589 return 0;
592 static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
594 struct curl_slist *headers = NULL;
595 if (parse_va_list(&headers, args) < 0) {
596 if (headers) curl_slist_free_all(headers);
597 return NULL;
600 return libcurl_open(url, modes, headers);
603 int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
605 static const struct hFILE_scheme_handler handler =
606 { hopen_libcurl, hfile_always_remote, "libcurl",
607 2000 + 50,
608 vhopen_libcurl };
610 #ifdef ENABLE_PLUGINS
611 // Embed version string for examination via strings(1) or what(1)
612 static const char id[] = "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION;
613 const char *version = strchr(id, '\t')+1;
614 #else
615 const char *version = hts_version();
616 #endif
617 const curl_version_info_data *info;
618 const char * const *protocol;
619 CURLcode err;
621 err = curl_global_init(CURL_GLOBAL_ALL);
622 if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
624 curl.multi = curl_multi_init();
625 if (curl.multi == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
627 info = curl_version_info(CURLVERSION_NOW);
628 ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
630 curl.nrunning = 0;
631 curl.perform_again = 0;
632 self->name = "libcurl";
633 self->destroy = libcurl_exit;
635 for (protocol = info->protocols; *protocol; protocol++)
636 hfile_add_scheme_handler(*protocol, &handler);
637 return 0;