1 /* hfile_s3.c -- Amazon S3 backend for low-level file streams.
3 Copyright (C) 2015-2017 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
33 #include "hts_internal.h"
34 #include "hfile_internal.h"
38 #include "htslib/hts.h" // for hts_version() and hts_verbose
39 #include "htslib/kstring.h"
41 #if defined HAVE_COMMONCRYPTO
43 #include <CommonCrypto/CommonHMAC.h>
45 #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
48 s3_sign(unsigned char *digest
, kstring_t
*key
, kstring_t
*message
)
50 CCHmac(kCCHmacAlgSHA1
, key
->s
, key
->l
, message
->s
, message
->l
, digest
);
51 return CC_SHA1_DIGEST_LENGTH
;
54 #elif defined HAVE_HMAC
56 #include <openssl/hmac.h>
58 #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
61 s3_sign(unsigned char *digest
, kstring_t
*key
, kstring_t
*message
)
64 HMAC(EVP_sha1(), key
->s
, key
->l
,
65 (unsigned char *) message
->s
, message
->l
, digest
, &len
);
70 #error No HMAC() routine found by configure
74 urldecode_kput(const char *s
, int len
, kstring_t
*str
)
80 if (s
[i
] == '%' && i
+2 < len
) {
81 buf
[0] = s
[i
+1], buf
[1] = s
[i
+2], buf
[2] = '\0';
82 kputc(strtol(buf
, NULL
, 16), str
);
85 else kputc(s
[i
++], str
);
88 static void base64_kput(const unsigned char *data
, size_t len
, kstring_t
*str
)
90 static const char base64
[] =
91 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
95 int bits
= 0, pad
= 0;
97 while (bits
|| i
< len
) {
100 if (i
< len
) x
|= data
[i
++];
105 kputc(base64
[(x
>> bits
) & 63], str
);
109 kputsn("==", pad
, str
);
112 static int is_dns_compliant(const char *s0
, const char *slim
)
114 int has_nondigit
= 0, len
= 0;
117 for (s
= s0
; s
< slim
; len
++, s
++)
120 else if (*s
== '-') {
122 if (s
== s0
|| s
+1 == slim
) return 0;
124 else if (isdigit_c(*s
))
126 else if (*s
== '.') {
127 if (s
== s0
|| ! isalnum_c(s
[-1])) return 0;
128 if (s
+1 == slim
|| ! isalnum_c(s
[1])) return 0;
132 return has_nondigit
&& len
>= 3 && len
<= 63;
135 static FILE *expand_tilde_open(const char *fname
, const char *mode
)
139 if (strncmp(fname
, "~/", 2) == 0) {
140 kstring_t full_fname
= { 0, 0, NULL
};
141 const char *home
= getenv("HOME");
142 if (! home
) return NULL
;
144 kputs(home
, &full_fname
);
145 kputs(&fname
[1], &full_fname
);
147 fp
= fopen(full_fname
.s
, mode
);
151 fp
= fopen(fname
, mode
);
156 static void parse_ini(const char *fname
, const char *section
, ...)
158 kstring_t line
= { 0, 0, NULL
};
159 int active
= 1; // Start active, so global properties are accepted
162 FILE *fp
= expand_tilde_open(fname
, "r");
163 if (fp
== NULL
) return;
165 while (line
.l
= 0, kgetline(&line
, (kgets_func
*) fgets
, fp
) >= 0)
166 if (line
.s
[0] == '[' && (s
= strchr(line
.s
, ']')) != NULL
) {
168 active
= (strcmp(&line
.s
[1], section
) == 0);
170 else if (active
&& (s
= strpbrk(line
.s
, ":=")) != NULL
) {
171 const char *key
= line
.s
, *value
= &s
[1], *akey
;
174 while (isspace_c(*key
)) key
++;
175 while (s
> key
&& isspace_c(s
[-1])) s
--;
178 while (isspace_c(*value
)) value
++;
179 while (line
.l
> 0 && isspace_c(line
.s
[line
.l
-1]))
180 line
.s
[--line
.l
] = '\0';
182 va_start(args
, section
);
183 while ((akey
= va_arg(args
, const char *)) != NULL
) {
184 kstring_t
*avar
= va_arg(args
, kstring_t
*);
185 if (strcmp(key
, akey
) == 0) { kputs(value
, avar
); break; }
194 static void parse_simple(const char *fname
, kstring_t
*id
, kstring_t
*secret
)
196 kstring_t text
= { 0, 0, NULL
};
200 FILE *fp
= expand_tilde_open(fname
, "r");
201 if (fp
== NULL
) return;
203 while (kgetline(&text
, (kgets_func
*) fgets
, fp
) >= 0)
208 while (isspace_c(*s
)) s
++;
209 kputsn(s
, len
= strcspn(s
, " \t"), id
);
212 while (isspace_c(*s
)) s
++;
213 kputsn(s
, strcspn(s
, " \t"), secret
);
218 static hFILE
* s3_rewrite(const char *s3url
, const char *mode
, va_list *argsp
)
220 const char *bucket
, *path
;
222 char *header_list
[4], **header
= header_list
;
224 kstring_t message
= { 0, 0, NULL
};
225 kstring_t url
= { 0, 0, NULL
};
226 kstring_t profile
= { 0, 0, NULL
};
227 kstring_t id
= { 0, 0, NULL
};
228 kstring_t secret
= { 0, 0, NULL
};
229 kstring_t host_base
= { 0, 0, NULL
};
230 kstring_t token
= { 0, 0, NULL
};
231 kstring_t token_hdr
= { 0, 0, NULL
};
232 kstring_t auth_hdr
= { 0, 0, NULL
};
234 time_t now
= time(NULL
);
237 struct tm
*tm
= gmtime_r(&now
, &tm_buffer
);
239 struct tm
*tm
= gmtime(&now
);
242 kputs(strchr(mode
, 'r')? "GET\n" : "PUT\n", &message
);
243 kputc('\n', &message
);
244 kputc('\n', &message
);
245 strftime(date_hdr
, sizeof date_hdr
, "Date: %a, %d %b %Y %H:%M:%S GMT", tm
);
246 *header
++ = date_hdr
;
247 kputs(&date_hdr
[6], &message
);
248 kputc('\n', &message
);
250 // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
252 if (s3url
[2] == '+') {
253 bucket
= strchr(s3url
, ':') + 1;
254 kputsn(&s3url
[3], bucket
- &s3url
[3], &url
);
257 kputs("https:", &url
);
260 while (*bucket
== '/') kputc(*bucket
++, &url
);
262 path
= bucket
+ strcspn(bucket
, "/?#@");
264 const char *colon
= strpbrk(bucket
, ":@");
266 urldecode_kput(bucket
, colon
- bucket
, &profile
);
269 const char *colon2
= strpbrk(&colon
[1], ":@");
270 urldecode_kput(bucket
, colon
- bucket
, &id
);
271 urldecode_kput(&colon
[1], colon2
- &colon
[1], &secret
);
273 urldecode_kput(&colon2
[1], path
- &colon2
[1], &token
);
277 path
= bucket
+ strcspn(bucket
, "/?#");
280 // If the URL has no ID[:SECRET]@, consider environment variables.
282 if ((v
= getenv("AWS_ACCESS_KEY_ID")) != NULL
) kputs(v
, &id
);
283 if ((v
= getenv("AWS_SECRET_ACCESS_KEY")) != NULL
) kputs(v
, &secret
);
284 if ((v
= getenv("AWS_SESSION_TOKEN")) != NULL
) kputs(v
, &token
);
286 if ((v
= getenv("AWS_DEFAULT_PROFILE")) != NULL
) kputs(v
, &profile
);
287 else if ((v
= getenv("AWS_PROFILE")) != NULL
) kputs(v
, &profile
);
288 else kputs("default", &profile
);
292 const char *v
= getenv("AWS_SHARED_CREDENTIALS_FILE");
293 parse_ini(v
? v
: "~/.aws/credentials", profile
.s
,
294 "aws_access_key_id", &id
, "aws_secret_access_key", &secret
,
295 "aws_session_token", &token
, NULL
);
298 parse_ini("~/.s3cfg", profile
.s
, "access_key", &id
,
299 "secret_key", &secret
, "access_token", &token
,
300 "host_base", &host_base
, NULL
);
302 parse_simple("~/.awssecret", &id
, &secret
);
304 if (host_base
.l
== 0)
305 kputs("s3.amazonaws.com", &host_base
);
306 // Use virtual hosted-style access if possible, otherwise path-style.
307 if (is_dns_compliant(bucket
, path
)) {
308 kputsn(bucket
, path
- bucket
, &url
);
310 kputs(host_base
.s
, &url
);
313 kputs(host_base
.s
, &url
);
315 kputsn(bucket
, path
- bucket
, &url
);
320 kputs("x-amz-security-token:", &message
);
321 kputs(token
.s
, &message
);
322 kputc('\n', &message
);
324 kputs("X-Amz-Security-Token: ", &token_hdr
);
325 kputs(token
.s
, &token_hdr
);
326 *header
++ = token_hdr
.s
;
329 kputc('/', &message
);
330 kputs(bucket
, &message
); // CanonicalizedResource is '/' + bucket + path
332 // If we have no id/secret, we can't sign the request but will
333 // still be able to access public data sets.
334 if (id
.l
> 0 && secret
.l
> 0) {
335 unsigned char digest
[DIGEST_BUFSIZ
];
336 size_t digest_len
= s3_sign(digest
, &secret
, &message
);
338 kputs("Authorization: AWS ", &auth_hdr
);
339 kputs(id
.s
, &auth_hdr
);
340 kputc(':', &auth_hdr
);
341 base64_kput(digest
, digest_len
, &auth_hdr
);
343 *header
++ = auth_hdr
.s
;
347 hFILE
*fp
= hopen(url
.s
, mode
, "va_list", argsp
, "httphdr:v", header_list
,
361 static hFILE
*s3_open(const char *url
, const char *mode
)
363 kstring_t mode_colon
= { 0, 0, NULL
};
364 kputs(mode
, &mode_colon
);
365 kputc(':', &mode_colon
);
366 hFILE
*fp
= s3_rewrite(url
, mode_colon
.s
, NULL
);
371 static hFILE
*s3_vopen(const char *url
, const char *mode_colon
, va_list args0
)
373 // Need to use va_copy() as we can only take the address of an actual
374 // va_list object, not that of a parameter whose type may have decayed.
376 va_copy(args
, args0
);
377 hFILE
*fp
= s3_rewrite(url
, mode_colon
, &args
);
382 int PLUGIN_GLOBAL(hfile_plugin_init
,_s3
)(struct hFILE_plugin
*self
)
384 static const struct hFILE_scheme_handler handler
=
385 { s3_open
, hfile_always_remote
, "Amazon S3", 2000 + 50, s3_vopen
388 #ifdef ENABLE_PLUGINS
389 // Embed version string for examination via strings(1) or what(1)
390 static const char id
[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION
;
391 if (hts_verbose
>= 9)
392 fprintf(stderr
, "[M::hfile_s3.init] version %s\n", strchr(id
, '\t')+1);
395 self
->name
= "Amazon S3";
396 hfile_add_scheme_handler("s3", &handler
);
397 hfile_add_scheme_handler("s3+http", &handler
);
398 hfile_add_scheme_handler("s3+https", &handler
);