12 /**********************
13 *** Core kurl APIs ***
14 **********************/
16 #define KU_DEF_BUFLEN 0x8000
17 #define KU_MAX_SKIP (KU_DEF_BUFLEN<<1) // if seek step is smaller than this, skip
19 #define kurl_isfile(u) ((u)->fd >= 0)
22 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
26 CURLM
*multi
; // cURL multi handler
27 CURL
*curl
; // cURL easy handle
28 uint8_t *buf
; // buffer
29 off_t off0
; // offset of the first byte in the buffer; the actual file offset equals off0 + p_buf
30 int fd
; // file descriptor for a normal file; <0 for a remote file
31 int m_buf
; // max buffer size; for a remote file, CURL_MAX_WRITE_SIZE*2 is recommended
32 int l_buf
; // length of the buffer; l_buf == 0 iff the input read entirely; l_buf <= m_buf
33 int p_buf
; // file position in the buffer; p_buf <= l_buf
34 int done_reading
; // true if we can read nothing from the file; buffer may not be empty even if done_reading is set
35 int err
; // error code
36 struct curl_slist
*hdr
;
40 char *url
, *date
, *auth
;
43 int kurl_init(void) // required for SSL and win32 socket; NOT thread safe
45 return curl_global_init(CURL_GLOBAL_DEFAULT
);
48 void kurl_destroy(void)
50 curl_global_cleanup();
53 static int prepare(kurl_t
*ku
, int do_seek
)
55 if (kurl_isfile(ku
)) {
56 if (do_seek
&& lseek(ku
->fd
, ku
->off0
, SEEK_SET
) != ku
->off0
)
58 } else { // FIXME: for S3, we need to re-authorize
60 rc
= curl_multi_remove_handle(ku
->multi
, ku
->curl
);
61 rc
= curl_easy_setopt(ku
->curl
, CURLOPT_RESUME_FROM
, ku
->off0
);
62 rc
= curl_multi_add_handle(ku
->multi
, ku
->curl
);
64 ku
->p_buf
= ku
->l_buf
= 0; // empty the buffer
68 static size_t write_cb(char *ptr
, size_t size
, size_t nmemb
, void *data
) // callback required by cURL
70 kurl_t
*ku
= (kurl_t
*)data
;
71 ssize_t nbytes
= size
* nmemb
;
72 if (nbytes
+ ku
->l_buf
> ku
->m_buf
)
73 return CURL_WRITEFUNC_PAUSE
;
74 memcpy(ku
->buf
+ ku
->l_buf
, ptr
, nbytes
);
79 static int fill_buffer(kurl_t
*ku
) // fill the buffer
81 assert(ku
->p_buf
== ku
->l_buf
); // buffer is always used up when fill_buffer() is called; otherwise a bug
82 ku
->off0
+= ku
->l_buf
;
83 ku
->p_buf
= ku
->l_buf
= 0;
84 if (ku
->done_reading
) return 0;
85 if (kurl_isfile(ku
)) {
86 // The following block is equivalent to "ku->l_buf = read(ku->fd, ku->buf, ku->m_buf)" on Mac.
87 // On Linux, the man page does not specify whether read() guarantees to read ku->m_buf bytes
88 // even if ->fd references a normal file with sufficient remaining bytes.
89 while (ku
->l_buf
< ku
->m_buf
) {
91 l
= read(ku
->fd
, ku
->buf
+ ku
->l_buf
, ku
->m_buf
- ku
->l_buf
);
95 if (ku
->l_buf
< ku
->m_buf
) ku
->done_reading
= 1;
103 // the following is adaped from docs/examples/fopen.c
104 to
.tv_sec
= 10, to
.tv_usec
= 0; // 10 seconds
105 curl_multi_timeout(ku
->multi
, &curl_to
);
107 to
.tv_sec
= curl_to
/ 1000;
108 if (to
.tv_sec
> 1) to
.tv_sec
= 1;
109 else to
.tv_usec
= (curl_to
% 1000) * 1000;
111 FD_ZERO(&fdr
); FD_ZERO(&fdw
); FD_ZERO(&fde
);
112 curl_multi_fdset(ku
->multi
, &fdr
, &fdw
, &fde
, &maxfd
); // FIXME: check return code
113 if (maxfd
>= 0 && (rc
= select(maxfd
+1, &fdr
, &fdw
, &fde
, &to
)) < 0) break;
114 if (maxfd
< 0) { // check curl_multi_fdset.3 about why we wait for 100ms here
115 struct timespec req
, rem
;
116 req
.tv_sec
= 0; req
.tv_nsec
= 100000000; // this is 100ms
117 nanosleep(&req
, &rem
);
119 curl_easy_pause(ku
->curl
, CURLPAUSE_CONT
);
120 rc
= curl_multi_perform(ku
->multi
, &n_running
); // FIXME: check return code
121 } while (n_running
&& ku
->l_buf
< ku
->m_buf
- CURL_MAX_WRITE_SIZE
);
122 if (ku
->l_buf
< ku
->m_buf
- CURL_MAX_WRITE_SIZE
) ku
->done_reading
= 1;
127 int kurl_close(kurl_t
*ku
)
129 if (ku
== 0) return 0;
131 curl_multi_remove_handle(ku
->multi
, ku
->curl
);
132 curl_easy_cleanup(ku
->curl
);
133 curl_multi_cleanup(ku
->multi
);
134 if (ku
->hdr
) curl_slist_free_all(ku
->hdr
);
135 } else close(ku
->fd
);
141 kurl_t
*kurl_open(const char *url
, kurl_opt_t
*opt
)
143 extern s3aux_t
s3_parse(const char *url
, const char *_id
, const char *_secret
, const char *fn
);
146 int fd
= -1, is_file
= 1, failed
= 0;
148 p
= strstr(url
, "://");
150 for (q
= url
; q
!= p
; ++q
)
151 if (!isalnum(*q
)) break;
152 if (q
== p
) is_file
= 0;
154 if (is_file
&& (fd
= open(url
, O_RDONLY
)) < 0) return 0;
156 ku
= (kurl_t
*)calloc(1, sizeof(kurl_t
));
157 ku
->fd
= is_file
? fd
: -1;
158 if (!kurl_isfile(ku
)) {
159 ku
->multi
= curl_multi_init();
160 ku
->curl
= curl_easy_init();
161 if (strstr(url
, "s3://") == url
) {
163 a
= s3_parse(url
, (opt
? opt
->s3keyid
: 0), (opt
? opt
->s3secretkey
: 0), (opt
? opt
->s3key_fn
: 0));
164 if (a
.url
== 0 || a
.date
== 0 || a
.auth
== 0) {
168 ku
->hdr
= curl_slist_append(ku
->hdr
, a
.date
);
169 ku
->hdr
= curl_slist_append(ku
->hdr
, a
.auth
);
170 curl_easy_setopt(ku
->curl
, CURLOPT_URL
, a
.url
);
171 curl_easy_setopt(ku
->curl
, CURLOPT_HTTPHEADER
, ku
->hdr
);
172 free(a
.date
); free(a
.auth
); free(a
.url
);
173 } else curl_easy_setopt(ku
->curl
, CURLOPT_URL
, url
);
174 curl_easy_setopt(ku
->curl
, CURLOPT_WRITEDATA
, ku
);
175 curl_easy_setopt(ku
->curl
, CURLOPT_VERBOSE
, 0L);
176 curl_easy_setopt(ku
->curl
, CURLOPT_NOSIGNAL
, 1L);
177 curl_easy_setopt(ku
->curl
, CURLOPT_WRITEFUNCTION
, write_cb
);
178 curl_easy_setopt(ku
->curl
, CURLOPT_SSL_VERIFYPEER
, 0L);
179 curl_easy_setopt(ku
->curl
, CURLOPT_SSL_VERIFYHOST
, 0L);
180 curl_easy_setopt(ku
->curl
, CURLOPT_FOLLOWLOCATION
, 1L);
182 ku
->m_buf
= KU_DEF_BUFLEN
;
183 if (!kurl_isfile(ku
) && ku
->m_buf
< CURL_MAX_WRITE_SIZE
* 2)
184 ku
->m_buf
= CURL_MAX_WRITE_SIZE
* 2; // for remote files, the buffer set to 2*CURL_MAX_WRITE_SIZE
185 ku
->buf
= (uint8_t*)calloc(ku
->m_buf
, 1);
186 if (kurl_isfile(ku
)) failed
= (fill_buffer(ku
) <= 0);
187 else failed
= (prepare(ku
, 0) < 0 || fill_buffer(ku
) <= 0);
195 kurl_t
*kurl_dopen(int fd
)
198 ku
= (kurl_t
*)calloc(1, sizeof(kurl_t
));
200 ku
->m_buf
= KU_DEF_BUFLEN
;
201 ku
->buf
= (uint8_t*)calloc(ku
->m_buf
, 1);
202 if (prepare(ku
, 0) < 0 || fill_buffer(ku
) <= 0) {
209 int kurl_buflen(kurl_t
*ku
, int len
)
211 if (len
<= 0 || len
< ku
->l_buf
) return ku
->m_buf
;
212 if (!kurl_isfile(ku
) && len
< CURL_MAX_WRITE_SIZE
* 2) return ku
->m_buf
;
214 kroundup32(ku
->m_buf
);
215 ku
->buf
= (uint8_t*)realloc(ku
->buf
, ku
->m_buf
);
219 ssize_t
kurl_read(kurl_t
*ku
, void *buf
, size_t nbytes
)
221 ssize_t rest
= nbytes
;
222 if (ku
->l_buf
== 0) return 0; // end-of-file
224 if (ku
->l_buf
- ku
->p_buf
>= rest
) {
225 if (buf
) memcpy((uint8_t*)buf
+ (nbytes
- rest
), ku
->buf
+ ku
->p_buf
, rest
);
230 if (buf
&& ku
->l_buf
> ku
->p_buf
)
231 memcpy((uint8_t*)buf
+ (nbytes
- rest
), ku
->buf
+ ku
->p_buf
, ku
->l_buf
- ku
->p_buf
);
232 rest
-= ku
->l_buf
- ku
->p_buf
;
233 ku
->p_buf
= ku
->l_buf
;
234 ret
= fill_buffer(ku
);
238 return nbytes
- rest
;
241 off_t
kurl_seek(kurl_t
*ku
, off_t offset
, int whence
) // FIXME: sometimes when seek() fails, read() will fail as well.
243 off_t new_off
= -1, cur_off
;
244 int failed
= 0, seek_end
= 0;
245 if (ku
== 0) return -1;
246 cur_off
= ku
->off0
+ ku
->p_buf
;
247 if (whence
== SEEK_SET
) new_off
= offset
;
248 else if (whence
== SEEK_CUR
) new_off
+= cur_off
+ offset
;
249 else if (whence
== SEEK_END
&& kurl_isfile(ku
)) new_off
= lseek(ku
->fd
, offset
, SEEK_END
), seek_end
= 1;
250 else { // not supported whence
251 ku
->err
= KURL_INV_WHENCE
;
254 if (new_off
< 0) { // negtive absolute offset
255 ku
->err
= KURL_SEEK_OUT
;
258 if (!seek_end
&& new_off
>= cur_off
&& new_off
- cur_off
+ ku
->p_buf
< ku
->l_buf
) {
259 ku
->p_buf
+= new_off
- cur_off
;
260 return ku
->off0
+ ku
->p_buf
;
262 if (seek_end
|| new_off
< cur_off
|| new_off
- cur_off
> KU_MAX_SKIP
) { // if jump is large, do actual seek
264 ku
->done_reading
= 0;
265 if (prepare(ku
, 1) < 0 || fill_buffer(ku
) <= 0) failed
= 1;
266 } else { // if jump is small, read through
268 r
= kurl_read(ku
, 0, new_off
- cur_off
);
269 if (r
+ cur_off
!= new_off
) failed
= 1; // out of range
271 if (failed
) ku
->err
= KURL_SEEK_OUT
, ku
->l_buf
= ku
->p_buf
= 0, new_off
= -1;
275 off_t
kurl_tell(const kurl_t
*ku
)
277 if (ku
== 0) return -1;
278 return ku
->off0
+ ku
->p_buf
;
281 int kurl_eof(const kurl_t
*ku
)
283 if (ku
== 0) return 1;
284 return (ku
->l_buf
== 0); // unless file end, buffer should never be empty
287 int kurl_fileno(const kurl_t
*ku
)
289 if (ku
== 0) return -1;
293 int kurl_error(const kurl_t
*ku
)
295 if (ku
== 0) return KURL_NULL
;
303 /* This code is public-domain - it is based on libcrypt placed in the public domain by Wei Dai and other contributors. */
305 #define HASH_LENGTH 20
306 #define BLOCK_LENGTH 64
308 typedef struct sha1nfo
{
309 union { uint8_t b
[BLOCK_LENGTH
]; uint32_t w
[BLOCK_LENGTH
/4]; } buf
;
311 union { uint8_t b
[HASH_LENGTH
]; uint32_t w
[HASH_LENGTH
/4]; } state
;
313 uint8_t keyBuffer
[BLOCK_LENGTH
];
314 uint8_t innerHash
[HASH_LENGTH
];
317 void sha1_init(sha1nfo
*s
)
319 const uint8_t table
[] = { 0x01,0x23,0x45,0x67, 0x89,0xab,0xcd,0xef, 0xfe,0xdc,0xba,0x98, 0x76,0x54,0x32,0x10, 0xf0,0xe1,0xd2,0xc3 };
320 memcpy(s
->state
.b
, table
, HASH_LENGTH
);
325 #define rol32(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
327 static void sha1_hashBlock(sha1nfo
*s
)
329 uint32_t i
, t
, a
= s
->state
.w
[0], b
= s
->state
.w
[1], c
= s
->state
.w
[2], d
= s
->state
.w
[3], e
= s
->state
.w
[4];
330 for (i
= 0; i
< 80; i
++) {
332 t
= s
->buf
.w
[(i
+13)&15] ^ s
->buf
.w
[(i
+8)&15] ^ s
->buf
.w
[(i
+2)&15] ^ s
->buf
.w
[i
&15];
333 s
->buf
.w
[i
&15] = rol32(t
, 1);
335 if (i
< 20) t
= 0x5a827999 + (d
^ (b
& (c
^ d
)));
336 else if (i
< 40) t
= 0x6ed9eba1 + (b
^ c
^ d
);
337 else if (i
< 60) t
= 0x8f1bbcdc + ((b
& c
) | (d
& (b
| c
)));
338 else t
= 0xca62c1d6 + (b
^ c
^ d
);
339 t
+= rol32(a
, 5) + e
+ s
->buf
.w
[i
&15];
340 e
= d
; d
= c
; c
= rol32(b
, 30); b
= a
; a
= t
;
342 s
->state
.w
[0] += a
; s
->state
.w
[1] += b
; s
->state
.w
[2] += c
; s
->state
.w
[3] += d
; s
->state
.w
[4] += e
;
345 static inline void sha1_add(sha1nfo
*s
, uint8_t data
)
347 s
->buf
.b
[s
->bufOffset
^ 3] = data
;
348 if (++s
->bufOffset
== BLOCK_LENGTH
) {
354 void sha1_write1(sha1nfo
*s
, uint8_t data
)
360 void sha1_write(sha1nfo
*s
, const char *data
, size_t len
)
362 while (len
--) sha1_write1(s
, (uint8_t)*data
++);
365 const uint8_t *sha1_final(sha1nfo
*s
)
369 while (s
->bufOffset
!= 56) sha1_add(s
, 0);
373 sha1_add(s
, s
->byteCount
>> 29);
374 sha1_add(s
, s
->byteCount
>> 21);
375 sha1_add(s
, s
->byteCount
>> 13);
376 sha1_add(s
, s
->byteCount
>> 5);
377 sha1_add(s
, s
->byteCount
<< 3);
378 for (i
= 0; i
< 5; ++i
) {
379 uint32_t a
= s
->state
.w
[i
];
380 s
->state
.w
[i
] = a
<<24 | (a
<<8&0x00ff0000) | (a
>>8&0x0000ff00) | a
>>24;
385 #define HMAC_IPAD 0x36
386 #define HMAC_OPAD 0x5c
388 void sha1_init_hmac(sha1nfo
*s
, const uint8_t* key
, int l_key
)
391 memset(s
->keyBuffer
, 0, BLOCK_LENGTH
);
392 if (l_key
> BLOCK_LENGTH
) {
394 while (l_key
--) sha1_write1(s
, *key
++);
395 memcpy(s
->keyBuffer
, sha1_final(s
), HASH_LENGTH
);
396 } else memcpy(s
->keyBuffer
, key
, l_key
);
398 for (i
= 0; i
< BLOCK_LENGTH
; ++i
)
399 sha1_write1(s
, s
->keyBuffer
[i
] ^ HMAC_IPAD
);
402 const uint8_t *sha1_final_hmac(sha1nfo
*s
)
405 memcpy(s
->innerHash
, sha1_final(s
), HASH_LENGTH
);
407 for (i
= 0; i
< BLOCK_LENGTH
; ++i
) sha1_write1(s
, s
->keyBuffer
[i
] ^ HMAC_OPAD
);
408 for (i
= 0; i
< HASH_LENGTH
; ++i
) sha1_write1(s
, s
->innerHash
[i
]);
409 return sha1_final(s
);
419 static void s3_sign(const char *key
, const char *data
, char out
[29])
421 const char *b64tab
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
422 const uint8_t *digest
;
425 sha1_init_hmac(&s
, (uint8_t*)key
, strlen(key
));
426 sha1_write(&s
, data
, strlen(data
));
427 digest
= sha1_final_hmac(&s
);
428 for (j
= i
= 0, rest
= 8; i
< 20; ++j
) { // base64 encoding
430 int next
= i
< 19? digest
[i
+1] : 0;
431 out
[j
] = b64tab
[(int)(digest
[i
] << (6-rest
) & 0x3f) | next
>> (rest
+2)], ++i
, rest
+= 2;
432 } else out
[j
] = b64tab
[(int)digest
[i
] >> (rest
-6) & 0x3f], rest
-= 6;
434 out
[j
++] = '='; out
[j
] = 0; // SHA1 digest always has 160 bits, or 20 bytes. We need one '=' at the end.
437 static char *s3_read_awssecret(const char *fn
)
439 char *p
, *secret
, buf
[128], *path
;
444 home
= getenv("HOME");
445 if (home
== 0) return 0;
446 l
= strlen(home
) + 12;
447 path
= (char*)malloc(strlen(home
) + 12);
448 strcat(strcpy(path
, home
), "/.awssecret");
449 } else path
= (char*)fn
;
450 fp
= fopen(path
, "r");
451 if (path
!= fn
) free(path
);
452 if (fp
== 0) return 0;
453 l
= fread(buf
, 1, 127, fp
);
456 for (p
= buf
; *p
!= 0 && *p
!= '\n'; ++p
);
457 if (*p
== 0) return 0;
458 *p
= 0; secret
= p
+ 1;
459 for (++p
; *p
!= 0 && *p
!= '\n'; ++p
);
462 p
= (char*)malloc(l
);
467 typedef struct { int l
, m
; char *s
; } kstring_t
;
469 static inline int kputsn(const char *p
, int l
, kstring_t
*s
)
471 if (s
->l
+ l
+ 1 >= s
->m
) {
474 s
->s
= (char*)realloc(s
->s
, s
->m
);
476 memcpy(s
->s
+ s
->l
, p
, l
);
482 s3aux_t
s3_parse(const char *url
, const char *_id
, const char *_secret
, const char *fn_secret
)
484 const char *id
, *secret
, *bucket
, *obj
;
485 char *id_secret
= 0, date
[64], sig
[29];
489 kstring_t str
= {0,0,0};
491 if (strstr(url
, "s3://") != url
) return a
;
493 for (obj
= bucket
; *obj
&& *obj
!= '/'; ++obj
);
494 if (*obj
== 0) return a
; // no object
495 // acquire AWS credential and time
496 if (_id
== 0 || _secret
== 0) {
497 id_secret
= s3_read_awssecret(fn_secret
);
498 if (id_secret
== 0) return a
; // fail to read the AWS credential
500 secret
= id_secret
+ strlen(id
) + 1;
501 } else id
= _id
, secret
= _secret
;
502 // compose URL for curl
503 kputsn("https://", 8, &str
);
504 kputsn(bucket
, obj
- bucket
, &str
);
505 kputsn(".s3.amazonaws.com", 17, &str
);
506 kputsn(obj
, strlen(obj
), &str
);
508 // compose the Date line
509 str
.l
= str
.m
= 0; str
.s
= 0;
511 strftime(date
, 64, "%a, %d %b %Y %H:%M:%S +0000", gmtime_r(&t
, &tmt
));
512 kputsn("Date: ", 6, &str
);
513 kputsn(date
, strlen(date
), &str
);
515 // compose the string to sign and sign it
516 str
.l
= str
.m
= 0; str
.s
= 0;
517 kputsn("GET\n\n\n", 6, &str
);
518 kputsn(date
, strlen(date
), &str
);
519 kputsn("\n", 1, &str
);
520 kputsn(bucket
-1, strlen(bucket
-1), &str
);
521 s3_sign(secret
, str
.s
, sig
);
522 // compose the Authorization line
524 kputsn("Authorization: AWS ", 19, &str
);
525 kputsn(id
, strlen(id
), &str
);
526 kputsn(":", 1, &str
);
527 kputsn(sig
, strlen(sig
), &str
);
529 // printf("curl -H '%s' -H '%s' %s\n", a.date, a.auth, a.url);
533 /*********************
534 *** Main function ***
535 *********************/
538 int main(int argc
, char *argv
[])
541 int c
, l
, l_buf
= 0x10000;
542 off_t start
= 0, rest
= -1;
547 memset(&opt
, 0, sizeof(kurl_opt_t
));
548 while ((c
= getopt(argc
, argv
, "c:l:a:")) >= 0) {
549 if (c
== 'c') start
= strtol(optarg
, &p
, 0);
550 else if (c
== 'l') rest
= strtol(optarg
, &p
, 0);
551 else if (c
== 'a') opt
.s3key_fn
= optarg
;
553 if (optind
== argc
) {
554 fprintf(stderr
, "Usage: kurl [-c start] [-l length] <url>\n");
558 f
= kurl_open(argv
[optind
], &opt
);
560 fprintf(stderr
, "ERROR: fail to open URL\n");
564 if (kurl_seek(f
, start
, SEEK_SET
) < 0) {
566 fprintf(stderr
, "ERROR: fail to seek\n");
570 buf
= (uint8_t*)calloc(l_buf
, 1);
572 int to_read
= rest
> 0 && rest
< l_buf
? rest
: l_buf
;
573 l
= kurl_read(f
, buf
, to_read
);
575 fwrite(buf
, 1, l
, stdout
);