limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / c_cpp / lib / klib / kurl.c
blob3bf92901ca1f54adf22cccdce799eb58b207f512
1 #include <stdio.h>
2 #include <fcntl.h>
3 #include <ctype.h>
4 #include <assert.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <string.h>
9 #include <curl/curl.h>
10 #include "kurl.h"
12 /**********************
13 *** Core kurl APIs ***
14 **********************/
16 #define KU_DEF_BUFLEN 0x8000
17 #define KU_MAX_SKIP (KU_DEF_BUFLEN<<1) // if seek step is smaller than this, skip
19 #define kurl_isfile(u) ((u)->fd >= 0)
21 #ifndef kroundup32
22 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
23 #endif
25 struct kurl_t {
26 CURLM *multi; // cURL multi handler
27 CURL *curl; // cURL easy handle
28 uint8_t *buf; // buffer
29 off_t off0; // offset of the first byte in the buffer; the actual file offset equals off0 + p_buf
30 int fd; // file descriptor for a normal file; <0 for a remote file
31 int m_buf; // max buffer size; for a remote file, CURL_MAX_WRITE_SIZE*2 is recommended
32 int l_buf; // length of the buffer; l_buf == 0 iff the input read entirely; l_buf <= m_buf
33 int p_buf; // file position in the buffer; p_buf <= l_buf
34 int done_reading; // true if we can read nothing from the file; buffer may not be empty even if done_reading is set
35 int err; // error code
36 struct curl_slist *hdr;
39 typedef struct {
40 char *url, *date, *auth;
41 } s3aux_t;
43 int kurl_init(void) // required for SSL and win32 socket; NOT thread safe
45 return curl_global_init(CURL_GLOBAL_DEFAULT);
48 void kurl_destroy(void)
50 curl_global_cleanup();
53 static int prepare(kurl_t *ku, int do_seek)
55 if (kurl_isfile(ku)) {
56 if (do_seek && lseek(ku->fd, ku->off0, SEEK_SET) != ku->off0)
57 return -1;
58 } else { // FIXME: for S3, we need to re-authorize
59 int rc;
60 rc = curl_multi_remove_handle(ku->multi, ku->curl);
61 rc = curl_easy_setopt(ku->curl, CURLOPT_RESUME_FROM, ku->off0);
62 rc = curl_multi_add_handle(ku->multi, ku->curl);
64 ku->p_buf = ku->l_buf = 0; // empty the buffer
65 return 0;
68 static size_t write_cb(char *ptr, size_t size, size_t nmemb, void *data) // callback required by cURL
70 kurl_t *ku = (kurl_t*)data;
71 ssize_t nbytes = size * nmemb;
72 if (nbytes + ku->l_buf > ku->m_buf)
73 return CURL_WRITEFUNC_PAUSE;
74 memcpy(ku->buf + ku->l_buf, ptr, nbytes);
75 ku->l_buf += nbytes;
76 return nbytes;
79 static int fill_buffer(kurl_t *ku) // fill the buffer
81 assert(ku->p_buf == ku->l_buf); // buffer is always used up when fill_buffer() is called; otherwise a bug
82 ku->off0 += ku->l_buf;
83 ku->p_buf = ku->l_buf = 0;
84 if (ku->done_reading) return 0;
85 if (kurl_isfile(ku)) {
86 // The following block is equivalent to "ku->l_buf = read(ku->fd, ku->buf, ku->m_buf)" on Mac.
87 // On Linux, the man page does not specify whether read() guarantees to read ku->m_buf bytes
88 // even if ->fd references a normal file with sufficient remaining bytes.
89 while (ku->l_buf < ku->m_buf) {
90 int l;
91 l = read(ku->fd, ku->buf + ku->l_buf, ku->m_buf - ku->l_buf);
92 if (l == 0) break;
93 ku->l_buf += l;
95 if (ku->l_buf < ku->m_buf) ku->done_reading = 1;
96 } else {
97 int n_running, rc;
98 fd_set fdr, fdw, fde;
99 do {
100 int maxfd = -1;
101 long curl_to = -1;
102 struct timeval to;
103 // the following is adaped from docs/examples/fopen.c
104 to.tv_sec = 10, to.tv_usec = 0; // 10 seconds
105 curl_multi_timeout(ku->multi, &curl_to);
106 if (curl_to >= 0) {
107 to.tv_sec = curl_to / 1000;
108 if (to.tv_sec > 1) to.tv_sec = 1;
109 else to.tv_usec = (curl_to % 1000) * 1000;
111 FD_ZERO(&fdr); FD_ZERO(&fdw); FD_ZERO(&fde);
112 curl_multi_fdset(ku->multi, &fdr, &fdw, &fde, &maxfd); // FIXME: check return code
113 if (maxfd >= 0 && (rc = select(maxfd+1, &fdr, &fdw, &fde, &to)) < 0) break;
114 if (maxfd < 0) { // check curl_multi_fdset.3 about why we wait for 100ms here
115 struct timespec req, rem;
116 req.tv_sec = 0; req.tv_nsec = 100000000; // this is 100ms
117 nanosleep(&req, &rem);
119 curl_easy_pause(ku->curl, CURLPAUSE_CONT);
120 rc = curl_multi_perform(ku->multi, &n_running); // FIXME: check return code
121 } while (n_running && ku->l_buf < ku->m_buf - CURL_MAX_WRITE_SIZE);
122 if (ku->l_buf < ku->m_buf - CURL_MAX_WRITE_SIZE) ku->done_reading = 1;
124 return ku->l_buf;
127 int kurl_close(kurl_t *ku)
129 if (ku == 0) return 0;
130 if (ku->fd < 0) {
131 curl_multi_remove_handle(ku->multi, ku->curl);
132 curl_easy_cleanup(ku->curl);
133 curl_multi_cleanup(ku->multi);
134 if (ku->hdr) curl_slist_free_all(ku->hdr);
135 } else close(ku->fd);
136 free(ku->buf);
137 free(ku);
138 return 0;
141 kurl_t *kurl_open(const char *url, kurl_opt_t *opt)
143 extern s3aux_t s3_parse(const char *url, const char *_id, const char *_secret, const char *fn);
144 const char *p, *q;
145 kurl_t *ku;
146 int fd = -1, is_file = 1, failed = 0;
148 p = strstr(url, "://");
149 if (p && *p) {
150 for (q = url; q != p; ++q)
151 if (!isalnum(*q)) break;
152 if (q == p) is_file = 0;
154 if (is_file && (fd = open(url, O_RDONLY)) < 0) return 0;
156 ku = (kurl_t*)calloc(1, sizeof(kurl_t));
157 ku->fd = is_file? fd : -1;
158 if (!kurl_isfile(ku)) {
159 ku->multi = curl_multi_init();
160 ku->curl = curl_easy_init();
161 if (strstr(url, "s3://") == url) {
162 s3aux_t a;
163 a = s3_parse(url, (opt? opt->s3keyid : 0), (opt? opt->s3secretkey : 0), (opt? opt->s3key_fn : 0));
164 if (a.url == 0 || a.date == 0 || a.auth == 0) {
165 kurl_close(ku);
166 return 0;
168 ku->hdr = curl_slist_append(ku->hdr, a.date);
169 ku->hdr = curl_slist_append(ku->hdr, a.auth);
170 curl_easy_setopt(ku->curl, CURLOPT_URL, a.url);
171 curl_easy_setopt(ku->curl, CURLOPT_HTTPHEADER, ku->hdr);
172 free(a.date); free(a.auth); free(a.url);
173 } else curl_easy_setopt(ku->curl, CURLOPT_URL, url);
174 curl_easy_setopt(ku->curl, CURLOPT_WRITEDATA, ku);
175 curl_easy_setopt(ku->curl, CURLOPT_VERBOSE, 0L);
176 curl_easy_setopt(ku->curl, CURLOPT_NOSIGNAL, 1L);
177 curl_easy_setopt(ku->curl, CURLOPT_WRITEFUNCTION, write_cb);
178 curl_easy_setopt(ku->curl, CURLOPT_SSL_VERIFYPEER, 0L);
179 curl_easy_setopt(ku->curl, CURLOPT_SSL_VERIFYHOST, 0L);
180 curl_easy_setopt(ku->curl, CURLOPT_FOLLOWLOCATION, 1L);
182 ku->m_buf = KU_DEF_BUFLEN;
183 if (!kurl_isfile(ku) && ku->m_buf < CURL_MAX_WRITE_SIZE * 2)
184 ku->m_buf = CURL_MAX_WRITE_SIZE * 2; // for remote files, the buffer set to 2*CURL_MAX_WRITE_SIZE
185 ku->buf = (uint8_t*)calloc(ku->m_buf, 1);
186 if (kurl_isfile(ku)) failed = (fill_buffer(ku) <= 0);
187 else failed = (prepare(ku, 0) < 0 || fill_buffer(ku) <= 0);
188 if (failed) {
189 kurl_close(ku);
190 return 0;
192 return ku;
195 kurl_t *kurl_dopen(int fd)
197 kurl_t *ku;
198 ku = (kurl_t*)calloc(1, sizeof(kurl_t));
199 ku->fd = fd;
200 ku->m_buf = KU_DEF_BUFLEN;
201 ku->buf = (uint8_t*)calloc(ku->m_buf, 1);
202 if (prepare(ku, 0) < 0 || fill_buffer(ku) <= 0) {
203 kurl_close(ku);
204 return 0;
206 return ku;
209 int kurl_buflen(kurl_t *ku, int len)
211 if (len <= 0 || len < ku->l_buf) return ku->m_buf;
212 if (!kurl_isfile(ku) && len < CURL_MAX_WRITE_SIZE * 2) return ku->m_buf;
213 ku->m_buf = len;
214 kroundup32(ku->m_buf);
215 ku->buf = (uint8_t*)realloc(ku->buf, ku->m_buf);
216 return ku->m_buf;
219 ssize_t kurl_read(kurl_t *ku, void *buf, size_t nbytes)
221 ssize_t rest = nbytes;
222 if (ku->l_buf == 0) return 0; // end-of-file
223 while (rest) {
224 if (ku->l_buf - ku->p_buf >= rest) {
225 if (buf) memcpy((uint8_t*)buf + (nbytes - rest), ku->buf + ku->p_buf, rest);
226 ku->p_buf += rest;
227 rest = 0;
228 } else {
229 int ret;
230 if (buf && ku->l_buf > ku->p_buf)
231 memcpy((uint8_t*)buf + (nbytes - rest), ku->buf + ku->p_buf, ku->l_buf - ku->p_buf);
232 rest -= ku->l_buf - ku->p_buf;
233 ku->p_buf = ku->l_buf;
234 ret = fill_buffer(ku);
235 if (ret <= 0) break;
238 return nbytes - rest;
241 off_t kurl_seek(kurl_t *ku, off_t offset, int whence) // FIXME: sometimes when seek() fails, read() will fail as well.
243 off_t new_off = -1, cur_off;
244 int failed = 0, seek_end = 0;
245 if (ku == 0) return -1;
246 cur_off = ku->off0 + ku->p_buf;
247 if (whence == SEEK_SET) new_off = offset;
248 else if (whence == SEEK_CUR) new_off += cur_off + offset;
249 else if (whence == SEEK_END && kurl_isfile(ku)) new_off = lseek(ku->fd, offset, SEEK_END), seek_end = 1;
250 else { // not supported whence
251 ku->err = KURL_INV_WHENCE;
252 return -1;
254 if (new_off < 0) { // negtive absolute offset
255 ku->err = KURL_SEEK_OUT;
256 return -1;
258 if (!seek_end && new_off >= cur_off && new_off - cur_off + ku->p_buf < ku->l_buf) {
259 ku->p_buf += new_off - cur_off;
260 return ku->off0 + ku->p_buf;
262 if (seek_end || new_off < cur_off || new_off - cur_off > KU_MAX_SKIP) { // if jump is large, do actual seek
263 ku->off0 = new_off;
264 ku->done_reading = 0;
265 if (prepare(ku, 1) < 0 || fill_buffer(ku) <= 0) failed = 1;
266 } else { // if jump is small, read through
267 off_t r;
268 r = kurl_read(ku, 0, new_off - cur_off);
269 if (r + cur_off != new_off) failed = 1; // out of range
271 if (failed) ku->err = KURL_SEEK_OUT, ku->l_buf = ku->p_buf = 0, new_off = -1;
272 return new_off;
275 off_t kurl_tell(const kurl_t *ku)
277 if (ku == 0) return -1;
278 return ku->off0 + ku->p_buf;
281 int kurl_eof(const kurl_t *ku)
283 if (ku == 0) return 1;
284 return (ku->l_buf == 0); // unless file end, buffer should never be empty
287 int kurl_fileno(const kurl_t *ku)
289 if (ku == 0) return -1;
290 return ku->fd;
293 int kurl_error(const kurl_t *ku)
295 if (ku == 0) return KURL_NULL;
296 return ku->err;
299 /*****************
300 *** HMAC-SHA1 ***
301 *****************/
303 /* This code is public-domain - it is based on libcrypt placed in the public domain by Wei Dai and other contributors. */
305 #define HASH_LENGTH 20
306 #define BLOCK_LENGTH 64
308 typedef struct sha1nfo {
309 union { uint8_t b[BLOCK_LENGTH]; uint32_t w[BLOCK_LENGTH/4]; } buf;
310 uint8_t bufOffset;
311 union { uint8_t b[HASH_LENGTH]; uint32_t w[HASH_LENGTH/4]; } state;
312 uint32_t byteCount;
313 uint8_t keyBuffer[BLOCK_LENGTH];
314 uint8_t innerHash[HASH_LENGTH];
315 } sha1nfo;
317 void sha1_init(sha1nfo *s)
319 const uint8_t table[] = { 0x01,0x23,0x45,0x67, 0x89,0xab,0xcd,0xef, 0xfe,0xdc,0xba,0x98, 0x76,0x54,0x32,0x10, 0xf0,0xe1,0xd2,0xc3 };
320 memcpy(s->state.b, table, HASH_LENGTH);
321 s->byteCount = 0;
322 s->bufOffset = 0;
325 #define rol32(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
327 static void sha1_hashBlock(sha1nfo *s)
329 uint32_t i, t, a = s->state.w[0], b = s->state.w[1], c = s->state.w[2], d = s->state.w[3], e = s->state.w[4];
330 for (i = 0; i < 80; i++) {
331 if (i >= 16) {
332 t = s->buf.w[(i+13)&15] ^ s->buf.w[(i+8)&15] ^ s->buf.w[(i+2)&15] ^ s->buf.w[i&15];
333 s->buf.w[i&15] = rol32(t, 1);
335 if (i < 20) t = 0x5a827999 + (d ^ (b & (c ^ d)));
336 else if (i < 40) t = 0x6ed9eba1 + (b ^ c ^ d);
337 else if (i < 60) t = 0x8f1bbcdc + ((b & c) | (d & (b | c)));
338 else t = 0xca62c1d6 + (b ^ c ^ d);
339 t += rol32(a, 5) + e + s->buf.w[i&15];
340 e = d; d = c; c = rol32(b, 30); b = a; a = t;
342 s->state.w[0] += a; s->state.w[1] += b; s->state.w[2] += c; s->state.w[3] += d; s->state.w[4] += e;
345 static inline void sha1_add(sha1nfo *s, uint8_t data)
347 s->buf.b[s->bufOffset ^ 3] = data;
348 if (++s->bufOffset == BLOCK_LENGTH) {
349 sha1_hashBlock(s);
350 s->bufOffset = 0;
354 void sha1_write1(sha1nfo *s, uint8_t data)
356 ++s->byteCount;
357 sha1_add(s, data);
360 void sha1_write(sha1nfo *s, const char *data, size_t len)
362 while (len--) sha1_write1(s, (uint8_t)*data++);
365 const uint8_t *sha1_final(sha1nfo *s)
367 int i;
368 sha1_add(s, 0x80);
369 while (s->bufOffset != 56) sha1_add(s, 0);
370 sha1_add(s, 0);
371 sha1_add(s, 0);
372 sha1_add(s, 0);
373 sha1_add(s, s->byteCount >> 29);
374 sha1_add(s, s->byteCount >> 21);
375 sha1_add(s, s->byteCount >> 13);
376 sha1_add(s, s->byteCount >> 5);
377 sha1_add(s, s->byteCount << 3);
378 for (i = 0; i < 5; ++i) {
379 uint32_t a = s->state.w[i];
380 s->state.w[i] = a<<24 | (a<<8&0x00ff0000) | (a>>8&0x0000ff00) | a>>24;
382 return s->state.b;
385 #define HMAC_IPAD 0x36
386 #define HMAC_OPAD 0x5c
388 void sha1_init_hmac(sha1nfo *s, const uint8_t* key, int l_key)
390 uint8_t i;
391 memset(s->keyBuffer, 0, BLOCK_LENGTH);
392 if (l_key > BLOCK_LENGTH) {
393 sha1_init(s);
394 while (l_key--) sha1_write1(s, *key++);
395 memcpy(s->keyBuffer, sha1_final(s), HASH_LENGTH);
396 } else memcpy(s->keyBuffer, key, l_key);
397 sha1_init(s);
398 for (i = 0; i < BLOCK_LENGTH; ++i)
399 sha1_write1(s, s->keyBuffer[i] ^ HMAC_IPAD);
402 const uint8_t *sha1_final_hmac(sha1nfo *s)
404 uint8_t i;
405 memcpy(s->innerHash, sha1_final(s), HASH_LENGTH);
406 sha1_init(s);
407 for (i = 0; i < BLOCK_LENGTH; ++i) sha1_write1(s, s->keyBuffer[i] ^ HMAC_OPAD);
408 for (i = 0; i < HASH_LENGTH; ++i) sha1_write1(s, s->innerHash[i]);
409 return sha1_final(s);
412 /*******************
413 *** S3 protocol ***
414 *******************/
416 #include <time.h>
417 #include <ctype.h>
419 static void s3_sign(const char *key, const char *data, char out[29])
421 const char *b64tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
422 const uint8_t *digest;
423 int i, j, rest;
424 sha1nfo s;
425 sha1_init_hmac(&s, (uint8_t*)key, strlen(key));
426 sha1_write(&s, data, strlen(data));
427 digest = sha1_final_hmac(&s);
428 for (j = i = 0, rest = 8; i < 20; ++j) { // base64 encoding
429 if (rest <= 6) {
430 int next = i < 19? digest[i+1] : 0;
431 out[j] = b64tab[(int)(digest[i] << (6-rest) & 0x3f) | next >> (rest+2)], ++i, rest += 2;
432 } else out[j] = b64tab[(int)digest[i] >> (rest-6) & 0x3f], rest -= 6;
434 out[j++] = '='; out[j] = 0; // SHA1 digest always has 160 bits, or 20 bytes. We need one '=' at the end.
437 static char *s3_read_awssecret(const char *fn)
439 char *p, *secret, buf[128], *path;
440 FILE *fp;
441 int l;
442 if (fn == 0) {
443 char *home;
444 home = getenv("HOME");
445 if (home == 0) return 0;
446 l = strlen(home) + 12;
447 path = (char*)malloc(strlen(home) + 12);
448 strcat(strcpy(path, home), "/.awssecret");
449 } else path = (char*)fn;
450 fp = fopen(path, "r");
451 if (path != fn) free(path);
452 if (fp == 0) return 0;
453 l = fread(buf, 1, 127, fp);
454 fclose(fp);
455 buf[l] = 0;
456 for (p = buf; *p != 0 && *p != '\n'; ++p);
457 if (*p == 0) return 0;
458 *p = 0; secret = p + 1;
459 for (++p; *p != 0 && *p != '\n'; ++p);
460 *p = 0;
461 l = p - buf + 1;
462 p = (char*)malloc(l);
463 memcpy(p, buf, l);
464 return p;
467 typedef struct { int l, m; char *s; } kstring_t;
469 static inline int kputsn(const char *p, int l, kstring_t *s)
471 if (s->l + l + 1 >= s->m) {
472 s->m = s->l + l + 2;
473 kroundup32(s->m);
474 s->s = (char*)realloc(s->s, s->m);
476 memcpy(s->s + s->l, p, l);
477 s->l += l;
478 s->s[s->l] = 0;
479 return l;
482 s3aux_t s3_parse(const char *url, const char *_id, const char *_secret, const char *fn_secret)
484 const char *id, *secret, *bucket, *obj;
485 char *id_secret = 0, date[64], sig[29];
486 time_t t;
487 struct tm tmt;
488 s3aux_t a = {0,0};
489 kstring_t str = {0,0,0};
490 // parse URL
491 if (strstr(url, "s3://") != url) return a;
492 bucket = url + 5;
493 for (obj = bucket; *obj && *obj != '/'; ++obj);
494 if (*obj == 0) return a; // no object
495 // acquire AWS credential and time
496 if (_id == 0 || _secret == 0) {
497 id_secret = s3_read_awssecret(fn_secret);
498 if (id_secret == 0) return a; // fail to read the AWS credential
499 id = id_secret;
500 secret = id_secret + strlen(id) + 1;
501 } else id = _id, secret = _secret;
502 // compose URL for curl
503 kputsn("https://", 8, &str);
504 kputsn(bucket, obj - bucket, &str);
505 kputsn(".s3.amazonaws.com", 17, &str);
506 kputsn(obj, strlen(obj), &str);
507 a.url = str.s;
508 // compose the Date line
509 str.l = str.m = 0; str.s = 0;
510 t = time(0);
511 strftime(date, 64, "%a, %d %b %Y %H:%M:%S +0000", gmtime_r(&t, &tmt));
512 kputsn("Date: ", 6, &str);
513 kputsn(date, strlen(date), &str);
514 a.date = str.s;
515 // compose the string to sign and sign it
516 str.l = str.m = 0; str.s = 0;
517 kputsn("GET\n\n\n", 6, &str);
518 kputsn(date, strlen(date), &str);
519 kputsn("\n", 1, &str);
520 kputsn(bucket-1, strlen(bucket-1), &str);
521 s3_sign(secret, str.s, sig);
522 // compose the Authorization line
523 str.l = 0;
524 kputsn("Authorization: AWS ", 19, &str);
525 kputsn(id, strlen(id), &str);
526 kputsn(":", 1, &str);
527 kputsn(sig, strlen(sig), &str);
528 a.auth = str.s;
529 // printf("curl -H '%s' -H '%s' %s\n", a.date, a.auth, a.url);
530 return a;
533 /*********************
534 *** Main function ***
535 *********************/
537 #ifdef KURL_MAIN
538 int main(int argc, char *argv[])
540 kurl_t *f;
541 int c, l, l_buf = 0x10000;
542 off_t start = 0, rest = -1;
543 uint8_t *buf;
544 char *p;
545 kurl_opt_t opt;
547 memset(&opt, 0, sizeof(kurl_opt_t));
548 while ((c = getopt(argc, argv, "c:l:a:")) >= 0) {
549 if (c == 'c') start = strtol(optarg, &p, 0);
550 else if (c == 'l') rest = strtol(optarg, &p, 0);
551 else if (c == 'a') opt.s3key_fn = optarg;
553 if (optind == argc) {
554 fprintf(stderr, "Usage: kurl [-c start] [-l length] <url>\n");
555 return 1;
557 kurl_init();
558 f = kurl_open(argv[optind], &opt);
559 if (f == 0) {
560 fprintf(stderr, "ERROR: fail to open URL\n");
561 return 2;
563 if (start > 0) {
564 if (kurl_seek(f, start, SEEK_SET) < 0) {
565 kurl_close(f);
566 fprintf(stderr, "ERROR: fail to seek\n");
567 return 3;
570 buf = (uint8_t*)calloc(l_buf, 1);
571 while (rest != 0) {
572 int to_read = rest > 0 && rest < l_buf? rest : l_buf;
573 l = kurl_read(f, buf, to_read);
574 if (l == 0) break;
575 fwrite(buf, 1, l, stdout);
576 rest -= l;
578 free(buf);
579 kurl_close(f);
580 kurl_destroy();
581 return 0;
583 #endif