modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / lib / htslib / bgzip.c
blobe078185d787baaacd09807afefeafb7dc5cf3d3e
1 /* bgzip.c -- Block compression/decompression utility.
3 Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology
4 Copyright (C) 2010, 2013-2017 Genome Research Ltd.
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
13 The above copyright notices and this permission notice shall be included in
14 all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
25 #include <config.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdio.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 #include <errno.h>
33 #include <stdarg.h>
34 #include <getopt.h>
35 #include <sys/stat.h>
36 #include "htslib/bgzf.h"
37 #include "htslib/hts.h"
39 static const int WINDOW_SIZE = 64 * 1024;
41 static void error(const char *format, ...)
43 va_list ap;
44 va_start(ap, format);
45 vfprintf(stderr, format, ap);
46 va_end(ap);
47 exit(EXIT_FAILURE);
50 static int confirm_overwrite(const char *fn)
52 int save_errno = errno;
53 int ret = 0;
55 if (isatty(STDIN_FILENO)) {
56 char c;
57 fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
58 if (scanf("%c", &c) == 1 && (c == 'Y' || c == 'y')) ret = 1;
61 errno = save_errno;
62 return ret;
65 static int bgzip_main_usage(void)
67 fprintf(stderr, "\n");
68 fprintf(stderr, "Version: %s\n", hts_version());
69 fprintf(stderr, "Usage: bgzip [OPTIONS] [FILE] ...\n");
70 fprintf(stderr, "Options:\n");
71 fprintf(stderr, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n");
72 fprintf(stderr, " -c, --stdout write on standard output, keep original files unchanged\n");
73 fprintf(stderr, " -d, --decompress decompress\n");
74 fprintf(stderr, " -f, --force overwrite files without asking\n");
75 fprintf(stderr, " -h, --help give this help\n");
76 fprintf(stderr, " -i, --index compress and create BGZF index\n");
77 fprintf(stderr, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n");
78 fprintf(stderr, " -r, --reindex (re)index compressed file\n");
79 fprintf(stderr, " -g, --rebgzip use an index file to bgzip a file\n");
80 fprintf(stderr, " -s, --size INT decompress INT bytes (uncompressed size)\n");
81 fprintf(stderr, " -@, --threads INT number of compression threads to use [1]\n");
82 fprintf(stderr, "\n");
83 return 1;
86 int main(int argc, char **argv)
88 int c, compress, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0;
89 BGZF *fp;
90 void *buffer;
91 long start, end, size;
92 char *index_fname = NULL;
93 int threads = 1;
95 static const struct option loptions[] =
97 {"help", no_argument, NULL, 'h'},
98 {"offset", required_argument, NULL, 'b'},
99 {"stdout", no_argument, NULL, 'c'},
100 {"decompress", no_argument, NULL, 'd'},
101 {"force", no_argument, NULL, 'f'},
102 {"index", no_argument, NULL, 'i'},
103 {"index-name", required_argument, NULL, 'I'},
104 {"reindex", no_argument, NULL, 'r'},
105 {"rebgzip",no_argument,NULL,'g'},
106 {"size", required_argument, NULL, 's'},
107 {"threads", required_argument, NULL, '@'},
108 {"version", no_argument, NULL, 1},
109 {NULL, 0, NULL, 0}
112 compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
113 while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:gr",loptions,NULL)) >= 0){
114 switch(c){
115 case 'd': compress = 0; break;
116 case 'c': pstdout = 1; break;
117 case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
118 case 's': size = atol(optarg); pstdout = 1; break;
119 case 'f': is_forced = 1; break;
120 case 'i': index = 1; break;
121 case 'I': index_fname = optarg; break;
122 case 'g': rebgzip = 1; break;
123 case 'r': reindex = 1; compress = 0; break;
124 case '@': threads = atoi(optarg); break;
125 case 1:
126 printf(
127 "bgzip (htslib) %s\n"
128 "Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
129 return EXIT_SUCCESS;
130 case 'h':
131 case '?': return bgzip_main_usage();
134 if (size >= 0) end = start + size;
135 if (end >= 0 && end < start) {
136 fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
137 return 1;
139 if (compress == 1) {
140 struct stat sbuf;
141 int f_src = fileno(stdin);
143 if ( argc>optind )
145 if ( stat(argv[optind],&sbuf)<0 )
147 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
148 return 1;
151 if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
152 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
153 return 1;
156 if (pstdout)
157 fp = bgzf_open("-", "w");
158 else
160 char *name = malloc(strlen(argv[optind]) + 5);
161 strcpy(name, argv[optind]);
162 strcat(name, ".gz");
163 fp = bgzf_open(name, is_forced? "w" : "wx");
164 if (fp == NULL && errno == EEXIST && confirm_overwrite(name))
165 fp = bgzf_open(name, "w");
166 if (fp == NULL) {
167 fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
168 free(name);
169 return 1;
171 free(name);
174 else if (!pstdout && isatty(fileno((FILE *)stdout)) )
175 return bgzip_main_usage();
176 else if ( index && !index_fname )
178 fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
179 return 1;
181 else
182 fp = bgzf_open("-", "w");
184 if ( index && rebgzip )
186 fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
187 return 1;
190 if ( rebgzip && !index_fname )
192 fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
193 return 1;
196 if (threads > 1)
197 bgzf_mt(fp, threads, 256);
199 if ( index ) bgzf_index_build_init(fp);
200 buffer = malloc(WINDOW_SIZE);
201 if (rebgzip){
202 if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
204 while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
205 if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
207 else {
208 while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
209 if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
211 if ( index )
213 if (index_fname) {
214 if (bgzf_index_dump(fp, index_fname, NULL) < 0)
215 error("Could not write index to '%s'\n", index_fname);
216 } else {
217 if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0)
218 error("Could not write index to '%s.gz.gzi'", argv[optind]);
221 if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
222 if (argc > optind && !pstdout) unlink(argv[optind]);
223 free(buffer);
224 close(f_src);
225 return 0;
227 else if ( reindex )
229 if ( argc>optind )
231 fp = bgzf_open(argv[optind], "r");
232 if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
234 else
236 if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
237 fp = bgzf_open("-", "r");
238 if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
241 buffer = malloc(BGZF_BLOCK_SIZE);
242 bgzf_index_build_init(fp);
243 int ret;
244 while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
245 free(buffer);
246 if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");
248 if ( index_fname ) {
249 if (bgzf_index_dump(fp, index_fname, NULL) < 0)
250 error("Could not write index to '%s'\n", index_fname);
251 } else {
252 if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0)
253 error("Could not write index to '%s.gzi'\n", argv[optind]);
256 if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
257 return 0;
259 else
261 struct stat sbuf;
262 int f_dst;
264 if ( argc>optind )
266 if ( stat(argv[optind],&sbuf)<0 )
268 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
269 return 1;
271 char *name;
272 int len = strlen(argv[optind]);
273 if ( strcmp(argv[optind]+len-3,".gz") )
275 fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
276 return 1;
278 fp = bgzf_open(argv[optind], "r");
279 if (fp == NULL) {
280 fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
281 return 1;
284 if (pstdout) {
285 f_dst = fileno(stdout);
287 else {
288 const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
289 name = strdup(argv[optind]);
290 name[strlen(name) - 3] = '\0';
291 f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666);
292 if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name))
293 f_dst = open(name, wrflags, 0666);
294 if (f_dst < 0) {
295 fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
296 free(name);
297 return 1;
299 free(name);
302 else if (!pstdout && isatty(fileno((FILE *)stdin)) )
303 return bgzip_main_usage();
304 else
306 f_dst = fileno(stdout);
307 fp = bgzf_open("-", "r");
308 if (fp == NULL) {
309 fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
310 return 1;
313 if (threads > 1)
314 bgzf_mt(fp, threads, 256);
316 buffer = malloc(WINDOW_SIZE);
317 if ( start>0 )
319 if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
320 if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
322 while (1) {
323 if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
324 else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
325 if (c == 0) break;
326 if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
327 start += c;
328 if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c);
329 if (end >= 0 && start >= end) break;
331 free(buffer);
332 if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
333 if (!pstdout) unlink(argv[optind]);
334 return 0;