1 /* bgzip.c -- Block compression/decompression utility.
3 Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology
4 Copyright (C) 2010, 2013-2017 Genome Research Ltd.
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
13 The above copyright notices and this permission notice shall be included in
14 all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
36 #include "htslib/bgzf.h"
37 #include "htslib/hts.h"
39 static const int WINDOW_SIZE
= 64 * 1024;
41 static void error(const char *format
, ...)
45 vfprintf(stderr
, format
, ap
);
50 static int confirm_overwrite(const char *fn
)
52 int save_errno
= errno
;
55 if (isatty(STDIN_FILENO
)) {
57 fprintf(stderr
, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn
);
58 if (scanf("%c", &c
) == 1 && (c
== 'Y' || c
== 'y')) ret
= 1;
65 static int bgzip_main_usage(void)
67 fprintf(stderr
, "\n");
68 fprintf(stderr
, "Version: %s\n", hts_version());
69 fprintf(stderr
, "Usage: bgzip [OPTIONS] [FILE] ...\n");
70 fprintf(stderr
, "Options:\n");
71 fprintf(stderr
, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n");
72 fprintf(stderr
, " -c, --stdout write on standard output, keep original files unchanged\n");
73 fprintf(stderr
, " -d, --decompress decompress\n");
74 fprintf(stderr
, " -f, --force overwrite files without asking\n");
75 fprintf(stderr
, " -h, --help give this help\n");
76 fprintf(stderr
, " -i, --index compress and create BGZF index\n");
77 fprintf(stderr
, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n");
78 fprintf(stderr
, " -r, --reindex (re)index compressed file\n");
79 fprintf(stderr
, " -g, --rebgzip use an index file to bgzip a file\n");
80 fprintf(stderr
, " -s, --size INT decompress INT bytes (uncompressed size)\n");
81 fprintf(stderr
, " -@, --threads INT number of compression threads to use [1]\n");
82 fprintf(stderr
, "\n");
86 int main(int argc
, char **argv
)
88 int c
, compress
, pstdout
, is_forced
, index
= 0, rebgzip
= 0, reindex
= 0;
91 long start
, end
, size
;
92 char *index_fname
= NULL
;
95 static const struct option loptions
[] =
97 {"help", no_argument
, NULL
, 'h'},
98 {"offset", required_argument
, NULL
, 'b'},
99 {"stdout", no_argument
, NULL
, 'c'},
100 {"decompress", no_argument
, NULL
, 'd'},
101 {"force", no_argument
, NULL
, 'f'},
102 {"index", no_argument
, NULL
, 'i'},
103 {"index-name", required_argument
, NULL
, 'I'},
104 {"reindex", no_argument
, NULL
, 'r'},
105 {"rebgzip",no_argument
,NULL
,'g'},
106 {"size", required_argument
, NULL
, 's'},
107 {"threads", required_argument
, NULL
, '@'},
108 {"version", no_argument
, NULL
, 1},
112 compress
= 1; pstdout
= 0; start
= 0; size
= -1; end
= -1; is_forced
= 0;
113 while((c
= getopt_long(argc
, argv
, "cdh?fb:@:s:iI:gr",loptions
,NULL
)) >= 0){
115 case 'd': compress
= 0; break;
116 case 'c': pstdout
= 1; break;
117 case 'b': start
= atol(optarg
); compress
= 0; pstdout
= 1; break;
118 case 's': size
= atol(optarg
); pstdout
= 1; break;
119 case 'f': is_forced
= 1; break;
120 case 'i': index
= 1; break;
121 case 'I': index_fname
= optarg
; break;
122 case 'g': rebgzip
= 1; break;
123 case 'r': reindex
= 1; compress
= 0; break;
124 case '@': threads
= atoi(optarg
); break;
127 "bgzip (htslib) %s\n"
128 "Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
131 case '?': return bgzip_main_usage();
134 if (size
>= 0) end
= start
+ size
;
135 if (end
>= 0 && end
< start
) {
136 fprintf(stderr
, "[bgzip] Illegal region: [%ld, %ld]\n", start
, end
);
141 int f_src
= fileno(stdin
);
145 if ( stat(argv
[optind
],&sbuf
)<0 )
147 fprintf(stderr
, "[bgzip] %s: %s\n", strerror(errno
), argv
[optind
]);
151 if ((f_src
= open(argv
[optind
], O_RDONLY
)) < 0) {
152 fprintf(stderr
, "[bgzip] %s: %s\n", strerror(errno
), argv
[optind
]);
157 fp
= bgzf_open("-", "w");
160 char *name
= malloc(strlen(argv
[optind
]) + 5);
161 strcpy(name
, argv
[optind
]);
163 fp
= bgzf_open(name
, is_forced
? "w" : "wx");
164 if (fp
== NULL
&& errno
== EEXIST
&& confirm_overwrite(name
))
165 fp
= bgzf_open(name
, "w");
167 fprintf(stderr
, "[bgzip] can't create %s: %s\n", name
, strerror(errno
));
174 else if (!pstdout
&& isatty(fileno((FILE *)stdout
)) )
175 return bgzip_main_usage();
176 else if ( index
&& !index_fname
)
178 fprintf(stderr
, "[bgzip] Index file name expected when writing to stdout\n");
182 fp
= bgzf_open("-", "w");
184 if ( index
&& rebgzip
)
186 fprintf(stderr
, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
190 if ( rebgzip
&& !index_fname
)
192 fprintf(stderr
, "[bgzip] Index file name expected when writing to stdout\n");
197 bgzf_mt(fp
, threads
, 256);
199 if ( index
) bgzf_index_build_init(fp
);
200 buffer
= malloc(WINDOW_SIZE
);
202 if ( bgzf_index_load(fp
, index_fname
, NULL
) < 0 ) error("Could not load index: %s.gzi\n", argv
[optind
]);
204 while ((c
= read(f_src
, buffer
, WINDOW_SIZE
)) > 0)
205 if (bgzf_block_write(fp
, buffer
, c
) < 0) error("Could not write %d bytes: Error %d\n", c
, fp
->errcode
);
208 while ((c
= read(f_src
, buffer
, WINDOW_SIZE
)) > 0)
209 if (bgzf_write(fp
, buffer
, c
) < 0) error("Could not write %d bytes: Error %d\n", c
, fp
->errcode
);
214 if (bgzf_index_dump(fp
, index_fname
, NULL
) < 0)
215 error("Could not write index to '%s'\n", index_fname
);
217 if (bgzf_index_dump(fp
, argv
[optind
], ".gz.gzi") < 0)
218 error("Could not write index to '%s.gz.gzi'", argv
[optind
]);
221 if (bgzf_close(fp
) < 0) error("Close failed: Error %d", fp
->errcode
);
222 if (argc
> optind
&& !pstdout
) unlink(argv
[optind
]);
231 fp
= bgzf_open(argv
[optind
], "r");
232 if ( !fp
) error("[bgzip] Could not open file: %s\n", argv
[optind
]);
236 if ( !index_fname
) error("[bgzip] Index file name expected when reading from stdin\n");
237 fp
= bgzf_open("-", "r");
238 if ( !fp
) error("[bgzip] Could not read from stdin: %s\n", strerror(errno
));
241 buffer
= malloc(BGZF_BLOCK_SIZE
);
242 bgzf_index_build_init(fp
);
244 while ( (ret
=bgzf_read(fp
, buffer
, BGZF_BLOCK_SIZE
))>0 ) ;
246 if ( ret
<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");
249 if (bgzf_index_dump(fp
, index_fname
, NULL
) < 0)
250 error("Could not write index to '%s'\n", index_fname
);
252 if (bgzf_index_dump(fp
, argv
[optind
], ".gzi") < 0)
253 error("Could not write index to '%s.gzi'\n", argv
[optind
]);
256 if ( bgzf_close(fp
)<0 ) error("Close failed: Error %d\n",fp
->errcode
);
266 if ( stat(argv
[optind
],&sbuf
)<0 )
268 fprintf(stderr
, "[bgzip] %s: %s\n", strerror(errno
), argv
[optind
]);
272 int len
= strlen(argv
[optind
]);
273 if ( strcmp(argv
[optind
]+len
-3,".gz") )
275 fprintf(stderr
, "[bgzip] %s: unknown suffix -- ignored\n", argv
[optind
]);
278 fp
= bgzf_open(argv
[optind
], "r");
280 fprintf(stderr
, "[bgzip] Could not open file: %s\n", argv
[optind
]);
285 f_dst
= fileno(stdout
);
288 const int wrflags
= O_WRONLY
| O_CREAT
| O_TRUNC
;
289 name
= strdup(argv
[optind
]);
290 name
[strlen(name
) - 3] = '\0';
291 f_dst
= open(name
, is_forced
? wrflags
: wrflags
|O_EXCL
, 0666);
292 if (f_dst
< 0 && errno
== EEXIST
&& confirm_overwrite(name
))
293 f_dst
= open(name
, wrflags
, 0666);
295 fprintf(stderr
, "[bgzip] can't create %s: %s\n", name
, strerror(errno
));
302 else if (!pstdout
&& isatty(fileno((FILE *)stdin
)) )
303 return bgzip_main_usage();
306 f_dst
= fileno(stdout
);
307 fp
= bgzf_open("-", "r");
309 fprintf(stderr
, "[bgzip] Could not read from stdin: %s\n", strerror(errno
));
314 bgzf_mt(fp
, threads
, 256);
316 buffer
= malloc(WINDOW_SIZE
);
319 if ( bgzf_index_load(fp
, argv
[optind
], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv
[optind
]);
320 if ( bgzf_useek(fp
, start
, SEEK_SET
) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start
);
323 if (end
< 0) c
= bgzf_read(fp
, buffer
, WINDOW_SIZE
);
324 else c
= bgzf_read(fp
, buffer
, (end
- start
> WINDOW_SIZE
)? WINDOW_SIZE
:(end
- start
));
326 if (c
< 0) error("Could not read %d bytes: Error %d\n", (end
- start
> WINDOW_SIZE
)? WINDOW_SIZE
:(end
- start
), fp
->errcode
);
328 if ( write(f_dst
, buffer
, c
) != c
) error("Could not write %d bytes\n", c
);
329 if (end
>= 0 && start
>= end
) break;
332 if (bgzf_close(fp
) < 0) error("Close failed: Error %d\n",fp
->errcode
);
333 if (!pstdout
) unlink(argv
[optind
]);