modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / lib / htslib / hfile_s3.c
blob9b23b439dbdbe949effcd3f05f6ad23a448bb080
1 /* hfile_s3.c -- Amazon S3 backend for low-level file streams.
3 Copyright (C) 2015-2017 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
25 #include <config.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
33 #include "hts_internal.h"
34 #include "hfile_internal.h"
35 #ifdef ENABLE_PLUGINS
36 #include "version.h"
37 #endif
38 #include "htslib/hts.h" // for hts_version() and hts_verbose
39 #include "htslib/kstring.h"
41 #if defined HAVE_COMMONCRYPTO
43 #include <CommonCrypto/CommonHMAC.h>
45 #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
47 static size_t
48 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
50 CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
51 return CC_SHA1_DIGEST_LENGTH;
54 #elif defined HAVE_HMAC
56 #include <openssl/hmac.h>
58 #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
60 static size_t
61 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
63 unsigned int len;
64 HMAC(EVP_sha1(), key->s, key->l,
65 (unsigned char *) message->s, message->l, digest, &len);
66 return len;
69 #else
70 #error No HMAC() routine found by configure
71 #endif
73 static void
74 urldecode_kput(const char *s, int len, kstring_t *str)
76 char buf[3];
77 int i = 0;
79 while (i < len)
80 if (s[i] == '%' && i+2 < len) {
81 buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
82 kputc(strtol(buf, NULL, 16), str);
83 i += 3;
85 else kputc(s[i++], str);
88 static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
90 static const char base64[] =
91 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
93 size_t i = 0;
94 unsigned x = 0;
95 int bits = 0, pad = 0;
97 while (bits || i < len) {
98 if (bits < 6) {
99 x <<= 8, bits += 8;
100 if (i < len) x |= data[i++];
101 else pad++;
104 bits -= 6;
105 kputc(base64[(x >> bits) & 63], str);
108 str->l -= pad;
109 kputsn("==", pad, str);
112 static int is_dns_compliant(const char *s0, const char *slim)
114 int has_nondigit = 0, len = 0;
115 const char *s;
117 for (s = s0; s < slim; len++, s++)
118 if (islower_c(*s))
119 has_nondigit = 1;
120 else if (*s == '-') {
121 has_nondigit = 1;
122 if (s == s0 || s+1 == slim) return 0;
124 else if (isdigit_c(*s))
126 else if (*s == '.') {
127 if (s == s0 || ! isalnum_c(s[-1])) return 0;
128 if (s+1 == slim || ! isalnum_c(s[1])) return 0;
130 else return 0;
132 return has_nondigit && len >= 3 && len <= 63;
135 static FILE *expand_tilde_open(const char *fname, const char *mode)
137 FILE *fp;
139 if (strncmp(fname, "~/", 2) == 0) {
140 kstring_t full_fname = { 0, 0, NULL };
141 const char *home = getenv("HOME");
142 if (! home) return NULL;
144 kputs(home, &full_fname);
145 kputs(&fname[1], &full_fname);
147 fp = fopen(full_fname.s, mode);
148 free(full_fname.s);
150 else
151 fp = fopen(fname, mode);
153 return fp;
156 static void parse_ini(const char *fname, const char *section, ...)
158 kstring_t line = { 0, 0, NULL };
159 int active = 1; // Start active, so global properties are accepted
160 char *s;
162 FILE *fp = expand_tilde_open(fname, "r");
163 if (fp == NULL) return;
165 while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
166 if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
167 *s = '\0';
168 active = (strcmp(&line.s[1], section) == 0);
170 else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
171 const char *key = line.s, *value = &s[1], *akey;
172 va_list args;
174 while (isspace_c(*key)) key++;
175 while (s > key && isspace_c(s[-1])) s--;
176 *s = '\0';
178 while (isspace_c(*value)) value++;
179 while (line.l > 0 && isspace_c(line.s[line.l-1]))
180 line.s[--line.l] = '\0';
182 va_start(args, section);
183 while ((akey = va_arg(args, const char *)) != NULL) {
184 kstring_t *avar = va_arg(args, kstring_t *);
185 if (strcmp(key, akey) == 0) { kputs(value, avar); break; }
187 va_end(args);
190 fclose(fp);
191 free(line.s);
194 static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
196 kstring_t text = { 0, 0, NULL };
197 char *s;
198 size_t len;
200 FILE *fp = expand_tilde_open(fname, "r");
201 if (fp == NULL) return;
203 while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
204 kputc(' ', &text);
205 fclose(fp);
207 s = text.s;
208 while (isspace_c(*s)) s++;
209 kputsn(s, len = strcspn(s, " \t"), id);
211 s += len;
212 while (isspace_c(*s)) s++;
213 kputsn(s, strcspn(s, " \t"), secret);
215 free(text.s);
218 static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
220 const char *bucket, *path;
221 char date_hdr[40];
222 char *header_list[4], **header = header_list;
224 kstring_t message = { 0, 0, NULL };
225 kstring_t url = { 0, 0, NULL };
226 kstring_t profile = { 0, 0, NULL };
227 kstring_t id = { 0, 0, NULL };
228 kstring_t secret = { 0, 0, NULL };
229 kstring_t host_base = { 0, 0, NULL };
230 kstring_t token = { 0, 0, NULL };
231 kstring_t token_hdr = { 0, 0, NULL };
232 kstring_t auth_hdr = { 0, 0, NULL };
234 time_t now = time(NULL);
235 #ifdef HAVE_GMTIME_R
236 struct tm tm_buffer;
237 struct tm *tm = gmtime_r(&now, &tm_buffer);
238 #else
239 struct tm *tm = gmtime(&now);
240 #endif
242 kputs(strchr(mode, 'r')? "GET\n" : "PUT\n", &message);
243 kputc('\n', &message);
244 kputc('\n', &message);
245 strftime(date_hdr, sizeof date_hdr, "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
246 *header++ = date_hdr;
247 kputs(&date_hdr[6], &message);
248 kputc('\n', &message);
250 // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
252 if (s3url[2] == '+') {
253 bucket = strchr(s3url, ':') + 1;
254 kputsn(&s3url[3], bucket - &s3url[3], &url);
256 else {
257 kputs("https:", &url);
258 bucket = &s3url[3];
260 while (*bucket == '/') kputc(*bucket++, &url);
262 path = bucket + strcspn(bucket, "/?#@");
263 if (*path == '@') {
264 const char *colon = strpbrk(bucket, ":@");
265 if (*colon != ':') {
266 urldecode_kput(bucket, colon - bucket, &profile);
268 else {
269 const char *colon2 = strpbrk(&colon[1], ":@");
270 urldecode_kput(bucket, colon - bucket, &id);
271 urldecode_kput(&colon[1], colon2 - &colon[1], &secret);
272 if (*colon2 == ':')
273 urldecode_kput(&colon2[1], path - &colon2[1], &token);
276 bucket = &path[1];
277 path = bucket + strcspn(bucket, "/?#");
279 else {
280 // If the URL has no ID[:SECRET]@, consider environment variables.
281 const char *v;
282 if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &id);
283 if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &secret);
284 if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &token);
286 if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &profile);
287 else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &profile);
288 else kputs("default", &profile);
291 if (id.l == 0) {
292 const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
293 parse_ini(v? v : "~/.aws/credentials", profile.s,
294 "aws_access_key_id", &id, "aws_secret_access_key", &secret,
295 "aws_session_token", &token, NULL);
297 if (id.l == 0)
298 parse_ini("~/.s3cfg", profile.s, "access_key", &id,
299 "secret_key", &secret, "access_token", &token,
300 "host_base", &host_base, NULL);
301 if (id.l == 0)
302 parse_simple("~/.awssecret", &id, &secret);
304 if (host_base.l == 0)
305 kputs("s3.amazonaws.com", &host_base);
306 // Use virtual hosted-style access if possible, otherwise path-style.
307 if (is_dns_compliant(bucket, path)) {
308 kputsn(bucket, path - bucket, &url);
309 kputc('.', &url);
310 kputs(host_base.s, &url);
312 else {
313 kputs(host_base.s, &url);
314 kputc('/', &url);
315 kputsn(bucket, path - bucket, &url);
317 kputs(path, &url);
319 if (token.l > 0) {
320 kputs("x-amz-security-token:", &message);
321 kputs(token.s, &message);
322 kputc('\n', &message);
324 kputs("X-Amz-Security-Token: ", &token_hdr);
325 kputs(token.s, &token_hdr);
326 *header++ = token_hdr.s;
329 kputc('/', &message);
330 kputs(bucket, &message); // CanonicalizedResource is '/' + bucket + path
332 // If we have no id/secret, we can't sign the request but will
333 // still be able to access public data sets.
334 if (id.l > 0 && secret.l > 0) {
335 unsigned char digest[DIGEST_BUFSIZ];
336 size_t digest_len = s3_sign(digest, &secret, &message);
338 kputs("Authorization: AWS ", &auth_hdr);
339 kputs(id.s, &auth_hdr);
340 kputc(':', &auth_hdr);
341 base64_kput(digest, digest_len, &auth_hdr);
343 *header++ = auth_hdr.s;
346 *header = NULL;
347 hFILE *fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
348 NULL);
349 free(message.s);
350 free(url.s);
351 free(profile.s);
352 free(id.s);
353 free(secret.s);
354 free(host_base.s);
355 free(token.s);
356 free(token_hdr.s);
357 free(auth_hdr.s);
358 return fp;
361 static hFILE *s3_open(const char *url, const char *mode)
363 kstring_t mode_colon = { 0, 0, NULL };
364 kputs(mode, &mode_colon);
365 kputc(':', &mode_colon);
366 hFILE *fp = s3_rewrite(url, mode_colon.s, NULL);
367 free(mode_colon.s);
368 return fp;
371 static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
373 // Need to use va_copy() as we can only take the address of an actual
374 // va_list object, not that of a parameter whose type may have decayed.
375 va_list args;
376 va_copy(args, args0);
377 hFILE *fp = s3_rewrite(url, mode_colon, &args);
378 va_end(args);
379 return fp;
382 int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
384 static const struct hFILE_scheme_handler handler =
385 { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
388 #ifdef ENABLE_PLUGINS
389 // Embed version string for examination via strings(1) or what(1)
390 static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION;
391 if (hts_verbose >= 9)
392 fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
393 #endif
395 self->name = "Amazon S3";
396 hfile_add_scheme_handler("s3", &handler);
397 hfile_add_scheme_handler("s3+http", &handler);
398 hfile_add_scheme_handler("s3+https", &handler);
399 return 0;