1 /* hfile_libcurl.c -- libcurl backend for low-level file streams.
3 Copyright (C) 2015-2017 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
31 #include <sys/select.h>
33 #include "hfile_internal.h"
37 #include "htslib/hts.h" // for hts_version() and hts_verbose
38 #include "htslib/kstring.h"
40 #include <curl/curl.h>
45 struct curl_slist
*headers
;
48 union { char *rd
; const char *wr
; } ptr
;
51 CURLcode final_result
; // easy result code for finished transfers
52 // Flags for communicating with libcurl callbacks:
53 unsigned paused
: 1; // callback tells us that it has paused transfer
54 unsigned closing
: 1; // informs callback that hclose() has been invoked
55 unsigned finished
: 1; // wait_perform() tells us transfer is complete
58 static int http_status_errno(int status
)
62 case 501: return ENOSYS
;
63 case 503: return EBUSY
;
64 case 504: return ETIMEDOUT
;
67 else if (status
>= 400)
69 case 401: return EPERM
;
70 case 403: return EACCES
;
71 case 404: return ENOENT
;
72 case 405: return EROFS
;
73 case 407: return EPERM
;
74 case 408: return ETIMEDOUT
;
75 case 410: return ENOENT
;
76 default: return EINVAL
;
81 static int easy_errno(CURL
*easy
, CURLcode err
)
89 case CURLE_UNSUPPORTED_PROTOCOL
:
90 case CURLE_URL_MALFORMAT
:
93 #if LIBCURL_VERSION_NUM >= 0x071505
94 case CURLE_NOT_BUILT_IN
:
98 case CURLE_COULDNT_RESOLVE_PROXY
:
99 case CURLE_COULDNT_RESOLVE_HOST
:
100 case CURLE_FTP_CANT_GET_HOST
:
101 return EDESTADDRREQ
; // Lookup failure
103 case CURLE_COULDNT_CONNECT
:
104 case CURLE_SEND_ERROR
:
105 case CURLE_RECV_ERROR
:
106 if (curl_easy_getinfo(easy
, CURLINFO_OS_ERRNO
, &lval
) == CURLE_OK
)
111 case CURLE_REMOTE_ACCESS_DENIED
:
112 case CURLE_LOGIN_DENIED
:
113 case CURLE_TFTP_PERM
:
116 case CURLE_PARTIAL_FILE
:
119 case CURLE_HTTP_RETURNED_ERROR
:
120 if (curl_easy_getinfo(easy
, CURLINFO_RESPONSE_CODE
, &lval
) == CURLE_OK
)
121 return http_status_errno(lval
);
125 case CURLE_OUT_OF_MEMORY
:
128 case CURLE_OPERATION_TIMEDOUT
:
131 case CURLE_RANGE_ERROR
:
134 case CURLE_SSL_CONNECT_ERROR
:
135 // TODO return SSL error buffer messages
138 case CURLE_FILE_COULDNT_READ_FILE
:
139 case CURLE_TFTP_NOTFOUND
:
142 case CURLE_TOO_MANY_REDIRECTS
:
145 case CURLE_FILESIZE_EXCEEDED
:
148 case CURLE_REMOTE_DISK_FULL
:
151 case CURLE_REMOTE_FILE_EXISTS
:
159 static int multi_errno(CURLMcode errm
)
162 case CURLM_CALL_MULTI_PERFORM
:
166 case CURLM_BAD_HANDLE
:
167 case CURLM_BAD_EASY_HANDLE
:
168 case CURLM_BAD_SOCKET
:
171 case CURLM_OUT_OF_MEMORY
:
184 unsigned perform_again
: 1;
185 } curl
= { NULL
, { 0, 0, NULL
}, 0, 0 };
187 static void libcurl_exit()
189 (void) curl_multi_cleanup(curl
.multi
);
192 free(curl
.useragent
.s
);
193 curl
.useragent
.l
= curl
.useragent
.m
= 0; curl
.useragent
.s
= NULL
;
195 curl_global_cleanup();
199 static void process_messages()
204 while ((msg
= curl_multi_info_read(curl
.multi
, &remaining
)) != NULL
) {
205 hFILE_libcurl
*fp
= NULL
;
206 curl_easy_getinfo(msg
->easy_handle
, CURLINFO_PRIVATE
, (char **) &fp
);
210 fp
->final_result
= msg
->data
.result
;
219 static int wait_perform()
229 if (curl_multi_fdset(curl
.multi
, &rd
, &wr
, &ex
, &maxfd
) != CURLM_OK
)
230 maxfd
= -1, timeout
= 1000;
232 timeout
= 100; // as recommended by curl_multi_fdset(3)
234 if (curl_multi_timeout(curl
.multi
, &timeout
) != CURLM_OK
)
236 else if (timeout
< 0)
237 timeout
= 10000; // as recommended by curl_multi_timeout(3)
240 if (timeout
> 0 && ! curl
.perform_again
) {
242 tval
.tv_sec
= (timeout
/ 1000);
243 tval
.tv_usec
= (timeout
% 1000) * 1000;
245 if (select(maxfd
+ 1, &rd
, &wr
, &ex
, &tval
) < 0) return -1;
248 errm
= curl_multi_perform(curl
.multi
, &nrunning
);
249 curl
.perform_again
= 0;
250 if (errm
== CURLM_CALL_MULTI_PERFORM
) curl
.perform_again
= 1;
251 else if (errm
!= CURLM_OK
) { errno
= multi_errno(errm
); return -1; }
253 if (nrunning
< curl
.nrunning
) process_messages();
258 static size_t recv_callback(char *ptr
, size_t size
, size_t nmemb
, void *fpv
)
260 hFILE_libcurl
*fp
= (hFILE_libcurl
*) fpv
;
261 size_t n
= size
* nmemb
;
263 if (n
> fp
->buffer
.len
) { fp
->paused
= 1; return CURL_WRITEFUNC_PAUSE
; }
264 else if (n
== 0) return 0;
266 memcpy(fp
->buffer
.ptr
.rd
, ptr
, n
);
267 fp
->buffer
.ptr
.rd
+= n
;
272 static ssize_t
libcurl_read(hFILE
*fpv
, void *bufferv
, size_t nbytes
)
274 hFILE_libcurl
*fp
= (hFILE_libcurl
*) fpv
;
275 char *buffer
= (char *) bufferv
;
278 fp
->buffer
.ptr
.rd
= buffer
;
279 fp
->buffer
.len
= nbytes
;
281 err
= curl_easy_pause(fp
->easy
, CURLPAUSE_CONT
);
282 if (err
!= CURLE_OK
) { errno
= easy_errno(fp
->easy
, err
); return -1; }
284 while (! fp
->paused
&& ! fp
->finished
)
285 if (wait_perform() < 0) return -1;
287 nbytes
= fp
->buffer
.ptr
.rd
- buffer
;
288 fp
->buffer
.ptr
.rd
= NULL
;
291 if (fp
->finished
&& fp
->final_result
!= CURLE_OK
) {
292 errno
= easy_errno(fp
->easy
, fp
->final_result
);
299 static size_t send_callback(char *ptr
, size_t size
, size_t nmemb
, void *fpv
)
301 hFILE_libcurl
*fp
= (hFILE_libcurl
*) fpv
;
302 size_t n
= size
* nmemb
;
304 if (fp
->buffer
.len
== 0) {
305 // Send buffer is empty; normally pause, or signal EOF if we're closing
306 if (fp
->closing
) return 0;
307 else { fp
->paused
= 1; return CURL_READFUNC_PAUSE
; }
310 if (n
> fp
->buffer
.len
) n
= fp
->buffer
.len
;
311 memcpy(ptr
, fp
->buffer
.ptr
.wr
, n
);
312 fp
->buffer
.ptr
.wr
+= n
;
317 static ssize_t
libcurl_write(hFILE
*fpv
, const void *bufferv
, size_t nbytes
)
319 hFILE_libcurl
*fp
= (hFILE_libcurl
*) fpv
;
320 const char *buffer
= (const char *) bufferv
;
323 fp
->buffer
.ptr
.wr
= buffer
;
324 fp
->buffer
.len
= nbytes
;
326 err
= curl_easy_pause(fp
->easy
, CURLPAUSE_CONT
);
327 if (err
!= CURLE_OK
) { errno
= easy_errno(fp
->easy
, err
); return -1; }
329 while (! fp
->paused
&& ! fp
->finished
)
330 if (wait_perform() < 0) return -1;
332 nbytes
= fp
->buffer
.ptr
.wr
- buffer
;
333 fp
->buffer
.ptr
.wr
= NULL
;
336 if (fp
->finished
&& fp
->final_result
!= CURLE_OK
) {
337 errno
= easy_errno(fp
->easy
, fp
->final_result
);
344 static off_t
libcurl_seek(hFILE
*fpv
, off_t offset
, int whence
)
346 hFILE_libcurl
*fp
= (hFILE_libcurl
*) fpv
;
360 if (fp
->file_size
< 0) { errno
= ESPIPE
; return -1; }
361 origin
= fp
->file_size
;
368 // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
369 if ((offset
< 0)? origin
+ offset
< 0
370 : (fp
->file_size
>= 0 && offset
> fp
->file_size
- origin
)) {
375 pos
= origin
+ offset
;
377 errm
= curl_multi_remove_handle(curl
.multi
, fp
->easy
);
378 if (errm
!= CURLM_OK
) { errno
= multi_errno(errm
); return -1; }
381 // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
382 // limited reads (e.g. about a BAM block!) so seeking can reuse the
383 // existing connection more often.
385 err
= curl_easy_setopt(fp
->easy
, CURLOPT_RESUME_FROM_LARGE
,(curl_off_t
)pos
);
386 if (err
!= CURLE_OK
) { errno
= easy_errno(fp
->easy
, err
); return -1; }
389 fp
->paused
= fp
->finished
= 0;
391 errm
= curl_multi_add_handle(curl
.multi
, fp
->easy
);
392 if (errm
!= CURLM_OK
) { errno
= multi_errno(errm
); return -1; }
395 err
= curl_easy_pause(fp
->easy
, CURLPAUSE_CONT
);
396 if (err
!= CURLE_OK
) { errno
= easy_errno(fp
->easy
, err
); return -1; }
398 while (! fp
->paused
&& ! fp
->finished
)
399 if (wait_perform() < 0) return -1;
401 if (fp
->finished
&& fp
->final_result
!= CURLE_OK
) {
402 errno
= easy_errno(fp
->easy
, fp
->final_result
);
409 static int libcurl_close(hFILE
*fpv
)
411 hFILE_libcurl
*fp
= (hFILE_libcurl
*) fpv
;
416 // Before closing the file, unpause it and perform on it so that uploads
417 // have the opportunity to signal EOF to the server -- see send_callback().
422 err
= curl_easy_pause(fp
->easy
, CURLPAUSE_CONT
);
423 if (err
!= CURLE_OK
) save_errno
= easy_errno(fp
->easy
, err
);
425 while (save_errno
== 0 && ! fp
->paused
&& ! fp
->finished
)
426 if (wait_perform() < 0) save_errno
= errno
;
428 if (fp
->finished
&& fp
->final_result
!= CURLE_OK
)
429 save_errno
= easy_errno(fp
->easy
, fp
->final_result
);
431 errm
= curl_multi_remove_handle(curl
.multi
, fp
->easy
);
432 if (errm
!= CURLM_OK
&& save_errno
== 0) save_errno
= multi_errno(errm
);
435 curl_easy_cleanup(fp
->easy
);
437 if (save_errno
) { errno
= save_errno
; return -1; }
441 static const struct hFILE_backend libcurl_backend
=
443 libcurl_read
, libcurl_write
, libcurl_seek
, NULL
, libcurl_close
447 libcurl_open(const char *url
, const char *modes
, struct curl_slist
*headers
)
456 if ((s
= strpbrk(modes
, "rwa+")) != NULL
) {
458 if (strpbrk(&s
[1], "rwa+")) mode
= 'e';
462 if (mode
!= 'r' && mode
!= 'w') { errno
= EINVAL
; goto early_error
; }
464 fp
= (hFILE_libcurl
*) hfile_init(sizeof (hFILE_libcurl
), modes
, 0);
465 if (fp
== NULL
) goto early_error
;
467 fp
->headers
= headers
;
469 fp
->buffer
.ptr
.rd
= NULL
;
471 fp
->final_result
= (CURLcode
) -1;
472 fp
->paused
= fp
->closing
= fp
->finished
= 0;
474 fp
->easy
= curl_easy_init();
475 if (fp
->easy
== NULL
) { errno
= ENOMEM
; goto error
; }
477 // Make a route to the hFILE_libcurl* given just a CURL* easy handle
478 err
= curl_easy_setopt(fp
->easy
, CURLOPT_PRIVATE
, fp
);
481 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_WRITEFUNCTION
, recv_callback
);
482 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_WRITEDATA
, fp
);
485 struct curl_slist
*list
;
487 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_READFUNCTION
, send_callback
);
488 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_READDATA
, fp
);
489 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_UPLOAD
, 1L);
491 list
= curl_slist_append(fp
->headers
, "Transfer-Encoding: chunked");
492 if (list
) fp
->headers
= list
; else goto error
;
495 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_URL
, url
);
496 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_USERAGENT
, curl
.useragent
.s
);
498 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_HTTPHEADER
, fp
->headers
);
499 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_FOLLOWLOCATION
, 1L);
500 if (hts_verbose
<= 8)
501 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_FAILONERROR
, 1L);
502 if (hts_verbose
>= 8)
503 err
|= curl_easy_setopt(fp
->easy
, CURLOPT_VERBOSE
, 1L);
505 if (err
!= 0) { errno
= ENOSYS
; goto error
; }
507 errm
= curl_multi_add_handle(curl
.multi
, fp
->easy
);
508 if (errm
!= CURLM_OK
) { errno
= multi_errno(errm
); goto error
; }
511 while (! fp
->paused
&& ! fp
->finished
)
512 if (wait_perform() < 0) goto error_remove
;
514 if (fp
->finished
&& fp
->final_result
!= CURLE_OK
) {
515 errno
= easy_errno(fp
->easy
, fp
->final_result
);
521 if (curl_easy_getinfo(fp
->easy
, CURLINFO_CONTENT_LENGTH_DOWNLOAD
,
522 &dval
) == CURLE_OK
&& dval
>= 0.0)
523 fp
->file_size
= (off_t
) (dval
+ 0.1);
526 fp
->base
.backend
= &libcurl_backend
;
531 (void) curl_multi_remove_handle(curl
.multi
, fp
->easy
);
537 if (fp
->easy
) curl_easy_cleanup(fp
->easy
);
538 if (fp
->headers
) curl_slist_free_all(fp
->headers
);
539 hfile_destroy((hFILE
*) fp
);
545 if (headers
) curl_slist_free_all(headers
);
550 static hFILE
*hopen_libcurl(const char *url
, const char *modes
)
552 return libcurl_open(url
, modes
, NULL
);
555 static int parse_va_list(struct curl_slist
**headers
, va_list args
)
559 while ((argtype
= va_arg(args
, const char *)) != NULL
)
560 if (strcmp(argtype
, "httphdr:v") == 0) {
562 for (hdr
= va_arg(args
, const char **); *hdr
; hdr
++) {
563 struct curl_slist
*list
= curl_slist_append(*headers
, *hdr
);
564 if (list
) *headers
= list
; else return -1;
567 else if (strcmp(argtype
, "httphdr:l") == 0) {
569 while ((hdr
= va_arg(args
, const char *)) != NULL
) {
570 struct curl_slist
*list
= curl_slist_append(*headers
, hdr
);
571 if (list
) *headers
= list
; else return -1;
574 else if (strcmp(argtype
, "httphdr") == 0) {
575 const char *hdr
= va_arg(args
, const char *);
577 struct curl_slist
*list
= curl_slist_append(*headers
, hdr
);
578 if (list
) *headers
= list
; else return -1;
581 else if (strcmp(argtype
, "va_list") == 0) {
582 va_list *args2
= va_arg(args
, va_list *);
584 if (parse_va_list(headers
, *args2
) < 0) return -1;
587 else { errno
= EINVAL
; return -1; }
592 static hFILE
*vhopen_libcurl(const char *url
, const char *modes
, va_list args
)
594 struct curl_slist
*headers
= NULL
;
595 if (parse_va_list(&headers
, args
) < 0) {
596 if (headers
) curl_slist_free_all(headers
);
600 return libcurl_open(url
, modes
, headers
);
603 int PLUGIN_GLOBAL(hfile_plugin_init
,_libcurl
)(struct hFILE_plugin
*self
)
605 static const struct hFILE_scheme_handler handler
=
606 { hopen_libcurl
, hfile_always_remote
, "libcurl",
610 #ifdef ENABLE_PLUGINS
611 // Embed version string for examination via strings(1) or what(1)
612 static const char id
[] = "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION
;
613 const char *version
= strchr(id
, '\t')+1;
615 const char *version
= hts_version();
617 const curl_version_info_data
*info
;
618 const char * const *protocol
;
621 err
= curl_global_init(CURL_GLOBAL_ALL
);
622 if (err
!= CURLE_OK
) { errno
= easy_errno(NULL
, err
); return -1; }
624 curl
.multi
= curl_multi_init();
625 if (curl
.multi
== NULL
) { curl_global_cleanup(); errno
= EIO
; return -1; }
627 info
= curl_version_info(CURLVERSION_NOW
);
628 ksprintf(&curl
.useragent
, "htslib/%s libcurl/%s", version
, info
->version
);
631 curl
.perform_again
= 0;
632 self
->name
= "libcurl";
633 self
->destroy
= libcurl_exit
;
635 for (protocol
= info
->protocols
; *protocol
; protocol
++)
636 hfile_add_scheme_handler(*protocol
, &handler
);