2 Copyright (c) 2012-2014 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * Include cram.h instead.
34 * This is an internal part of the CRAM system and is automatically included
35 * when you #include cram.h.
37 * Implements the low level CRAM I/O primitives.
38 * This includes basic data types such as byte, int, ITF-8,
39 * maps, bitwise I/O, etc.
46 #include <cram/misc.h>
52 /**@{ ----------------------------------------------------------------------
53 * ITF8 encoding and decoding.
55 * Also see the itf8_get and itf8_put macros.
58 /*! INTERNAL: Converts two characters into an integer for use in switch{} */
59 #define CRAM_KEY(a,b) (((a)<<8)|((b)))
61 /*! Reads an integer in ITF-8 encoding from 'fd' and stores it in
65 * Returns the number of bytes read on success;
68 int itf8_decode(cram_fd
*fd
, int32_t *val
);
70 static inline int itf8_get(char *cp
, int32_t *val_p
) {
71 unsigned char *up
= (unsigned char *)cp
;
76 } else if (up
[0] < 0xc0) {
77 *val_p
= ((up
[0] <<8) | up
[1]) & 0x3fff;
79 } else if (up
[0] < 0xe0) {
80 *val_p
= ((up
[0]<<16) | (up
[1]<< 8) | up
[2]) & 0x1fffff;
82 } else if (up
[0] < 0xf0) {
83 *val_p
= ((up
[0]<<24) | (up
[1]<<16) | (up
[2]<<8) | up
[3]) & 0x0fffffff;
86 *val_p
= ((up
[0] & 0x0f)<<28) | (up
[1]<<20) | (up
[2]<<12) | (up
[3]<<4) | (up
[4] & 0x0f);
92 * Stores a value to memory in ITF-8 format.
94 * Returns the number of bytes required to store the number.
95 * This is a maximum of 5 bytes.
97 static inline int itf8_put(char *cp
, int32_t val
) {
98 unsigned char *up
= (unsigned char *)cp
;
99 if (!(val
& ~0x00000007f)) { // 1 byte
102 } else if (!(val
& ~0x00003fff)) { // 2 byte
103 *up
++ = (val
>> 8 ) | 0x80;
106 } else if (!(val
& ~0x01fffff)) { // 3 byte
107 *up
++ = (val
>> 16) | 0xc0;
108 *up
++ = (val
>> 8 ) & 0xff;
111 } else if (!(val
& ~0x0fffffff)) { // 4 byte
112 *up
++ = (val
>> 24) | 0xe0;
113 *up
++ = (val
>> 16) & 0xff;
114 *up
++ = (val
>> 8 ) & 0xff;
118 *up
++ = 0xf0 | ((val
>>28) & 0xff);
119 *up
++ = (val
>> 20) & 0xff;
120 *up
++ = (val
>> 12) & 0xff;
121 *up
++ = (val
>> 4 ) & 0xff;
128 /* 64-bit itf8 variant */
129 static inline int ltf8_put(char *cp
, int64_t val
) {
130 unsigned char *up
= (unsigned char *)cp
;
131 if (!(val
& ~((1LL<<7)-1))) {
134 } else if (!(val
& ~((1LL<<(6+8))-1))) {
135 *up
++ = (val
>> 8 ) | 0x80;
138 } else if (!(val
& ~((1LL<<(5+2*8))-1))) {
139 *up
++ = (val
>> 16) | 0xc0;
140 *up
++ = (val
>> 8 ) & 0xff;
143 } else if (!(val
& ~((1LL<<(4+3*8))-1))) {
144 *up
++ = (val
>> 24) | 0xe0;
145 *up
++ = (val
>> 16) & 0xff;
146 *up
++ = (val
>> 8 ) & 0xff;
149 } else if (!(val
& ~((1LL<<(3+4*8))-1))) {
150 *up
++ = (val
>> 32) | 0xf0;
151 *up
++ = (val
>> 24) & 0xff;
152 *up
++ = (val
>> 16) & 0xff;
153 *up
++ = (val
>> 8 ) & 0xff;
156 } else if (!(val
& ~((1LL<<(2+5*8))-1))) {
157 *up
++ = (val
>> 40) | 0xf8;
158 *up
++ = (val
>> 32) & 0xff;
159 *up
++ = (val
>> 24) & 0xff;
160 *up
++ = (val
>> 16) & 0xff;
161 *up
++ = (val
>> 8 ) & 0xff;
164 } else if (!(val
& ~((1LL<<(1+6*8))-1))) {
165 *up
++ = (val
>> 48) | 0xfc;
166 *up
++ = (val
>> 40) & 0xff;
167 *up
++ = (val
>> 32) & 0xff;
168 *up
++ = (val
>> 24) & 0xff;
169 *up
++ = (val
>> 16) & 0xff;
170 *up
++ = (val
>> 8 ) & 0xff;
173 } else if (!(val
& ~((1LL<<(7*8))-1))) {
174 *up
++ = (val
>> 56) | 0xfe;
175 *up
++ = (val
>> 48) & 0xff;
176 *up
++ = (val
>> 40) & 0xff;
177 *up
++ = (val
>> 32) & 0xff;
178 *up
++ = (val
>> 24) & 0xff;
179 *up
++ = (val
>> 16) & 0xff;
180 *up
++ = (val
>> 8 ) & 0xff;
185 *up
++ = (val
>> 56) & 0xff;
186 *up
++ = (val
>> 48) & 0xff;
187 *up
++ = (val
>> 40) & 0xff;
188 *up
++ = (val
>> 32) & 0xff;
189 *up
++ = (val
>> 24) & 0xff;
190 *up
++ = (val
>> 16) & 0xff;
191 *up
++ = (val
>> 8 ) & 0xff;
197 static inline int ltf8_get(char *cp
, int64_t *val_p
) {
198 unsigned char *up
= (unsigned char *)cp
;
203 } else if (up
[0] < 0xc0) {
204 *val_p
= (((uint64_t)up
[0]<< 8) |
205 (uint64_t)up
[1]) & (((1LL<<(6+8)))-1);
207 } else if (up
[0] < 0xe0) {
208 *val_p
= (((uint64_t)up
[0]<<16) |
209 ((uint64_t)up
[1]<< 8) |
210 (uint64_t)up
[2]) & ((1LL<<(5+2*8))-1);
212 } else if (up
[0] < 0xf0) {
213 *val_p
= (((uint64_t)up
[0]<<24) |
214 ((uint64_t)up
[1]<<16) |
215 ((uint64_t)up
[2]<< 8) |
216 (uint64_t)up
[3]) & ((1LL<<(4+3*8))-1);
218 } else if (up
[0] < 0xf8) {
219 *val_p
= (((uint64_t)up
[0]<<32) |
220 ((uint64_t)up
[1]<<24) |
221 ((uint64_t)up
[2]<<16) |
222 ((uint64_t)up
[3]<< 8) |
223 (uint64_t)up
[4]) & ((1LL<<(3+4*8))-1);
225 } else if (up
[0] < 0xfc) {
226 *val_p
= (((uint64_t)up
[0]<<40) |
227 ((uint64_t)up
[1]<<32) |
228 ((uint64_t)up
[2]<<24) |
229 ((uint64_t)up
[3]<<16) |
230 ((uint64_t)up
[4]<< 8) |
231 (uint64_t)up
[5]) & ((1LL<<(2+5*8))-1);
233 } else if (up
[0] < 0xfe) {
234 *val_p
= (((uint64_t)up
[0]<<48) |
235 ((uint64_t)up
[1]<<40) |
236 ((uint64_t)up
[2]<<32) |
237 ((uint64_t)up
[3]<<24) |
238 ((uint64_t)up
[4]<<16) |
239 ((uint64_t)up
[5]<< 8) |
240 (uint64_t)up
[6]) & ((1LL<<(1+6*8))-1);
242 } else if (up
[0] < 0xff) {
243 *val_p
= (((uint64_t)up
[1]<<48) |
244 ((uint64_t)up
[2]<<40) |
245 ((uint64_t)up
[3]<<32) |
246 ((uint64_t)up
[4]<<24) |
247 ((uint64_t)up
[5]<<16) |
248 ((uint64_t)up
[6]<< 8) |
249 (uint64_t)up
[7]) & ((1LL<<(7*8))-1);
252 *val_p
= (((uint64_t)up
[1]<<56) |
253 ((uint64_t)up
[2]<<48) |
254 ((uint64_t)up
[3]<<40) |
255 ((uint64_t)up
[4]<<32) |
256 ((uint64_t)up
[5]<<24) |
257 ((uint64_t)up
[6]<<16) |
258 ((uint64_t)up
[7]<< 8) |
264 #define itf8_size(v) ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5)
267 /* Version of itf8_get that checks it hasn't run out of input */
269 extern const int itf8_bytes
[16];
270 extern const int ltf8_bytes
[256];
272 static inline int safe_itf8_get(const char *cp
, const char *endp
,
274 const unsigned char *up
= (unsigned char *)cp
;
277 (cp
>= endp
|| endp
- cp
< itf8_bytes
[up
[0]>>4])) {
285 } else if (up
[0] < 0xc0) {
286 *val_p
= ((up
[0] <<8) | up
[1]) & 0x3fff;
288 } else if (up
[0] < 0xe0) {
289 *val_p
= ((up
[0]<<16) | (up
[1]<< 8) | up
[2]) & 0x1fffff;
291 } else if (up
[0] < 0xf0) {
292 *val_p
= (((uint32_t)up
[0]<<24) | (up
[1]<<16) | (up
[2]<<8) | up
[3]) & 0x0fffffff;
295 uint32_t uv
= (((uint32_t)up
[0] & 0x0f)<<28) | (up
[1]<<20) | (up
[2]<<12) | (up
[3]<<4) | (up
[4] & 0x0f);
296 *val_p
= uv
< 0x80000000UL
? uv
: -((int32_t) (0xffffffffUL
- uv
)) - 1;
301 static inline int safe_ltf8_get(const char *cp
, const char *endp
,
303 unsigned char *up
= (unsigned char *)cp
;
306 (cp
>= endp
|| endp
- cp
< ltf8_bytes
[up
[0]])) return 0;
311 } else if (up
[0] < 0xc0) {
312 *val_p
= (((uint64_t)up
[0]<< 8) |
313 (uint64_t)up
[1]) & (((1LL<<(6+8)))-1);
315 } else if (up
[0] < 0xe0) {
316 *val_p
= (((uint64_t)up
[0]<<16) |
317 ((uint64_t)up
[1]<< 8) |
318 (uint64_t)up
[2]) & ((1LL<<(5+2*8))-1);
320 } else if (up
[0] < 0xf0) {
321 *val_p
= (((uint64_t)up
[0]<<24) |
322 ((uint64_t)up
[1]<<16) |
323 ((uint64_t)up
[2]<< 8) |
324 (uint64_t)up
[3]) & ((1LL<<(4+3*8))-1);
326 } else if (up
[0] < 0xf8) {
327 *val_p
= (((uint64_t)up
[0]<<32) |
328 ((uint64_t)up
[1]<<24) |
329 ((uint64_t)up
[2]<<16) |
330 ((uint64_t)up
[3]<< 8) |
331 (uint64_t)up
[4]) & ((1LL<<(3+4*8))-1);
333 } else if (up
[0] < 0xfc) {
334 *val_p
= (((uint64_t)up
[0]<<40) |
335 ((uint64_t)up
[1]<<32) |
336 ((uint64_t)up
[2]<<24) |
337 ((uint64_t)up
[3]<<16) |
338 ((uint64_t)up
[4]<< 8) |
339 (uint64_t)up
[5]) & ((1LL<<(2+5*8))-1);
341 } else if (up
[0] < 0xfe) {
342 *val_p
= (((uint64_t)up
[0]<<48) |
343 ((uint64_t)up
[1]<<40) |
344 ((uint64_t)up
[2]<<32) |
345 ((uint64_t)up
[3]<<24) |
346 ((uint64_t)up
[4]<<16) |
347 ((uint64_t)up
[5]<< 8) |
348 (uint64_t)up
[6]) & ((1LL<<(1+6*8))-1);
350 } else if (up
[0] < 0xff) {
351 *val_p
= (((uint64_t)up
[1]<<48) |
352 ((uint64_t)up
[2]<<40) |
353 ((uint64_t)up
[3]<<32) |
354 ((uint64_t)up
[4]<<24) |
355 ((uint64_t)up
[5]<<16) |
356 ((uint64_t)up
[6]<< 8) |
357 (uint64_t)up
[7]) & ((1LL<<(7*8))-1);
360 *val_p
= (((uint64_t)up
[1]<<56) |
361 ((uint64_t)up
[2]<<48) |
362 ((uint64_t)up
[3]<<40) |
363 ((uint64_t)up
[4]<<32) |
364 ((uint64_t)up
[5]<<24) |
365 ((uint64_t)up
[6]<<16) |
366 ((uint64_t)up
[7]<< 8) |
372 /*! Pushes a value in ITF8 format onto the end of a block.
374 * This shouldn't be used for high-volume data as it is not the fastest
378 * Returns the number of bytes written
380 int itf8_put_blk(cram_block
*blk
, int val
);
382 /*! Pulls a literal 32-bit value from a block.
384 * @returns the number of bytes decoded;
387 int int32_get_blk(cram_block
*b
, int32_t *val
);
389 /*! Pushes a literal 32-bit value onto the end of a block.
392 * Returns 0 on success;
395 int int32_put_blk(cram_block
*blk
, int32_t val
);
399 /**@{ ----------------------------------------------------------------------
400 * CRAM blocks - the dynamically growable data block. We have code to
401 * create, update, (un)compress and read/write.
403 * These are derived from the deflate_interlaced.c blocks, but with the
404 * CRAM extension of content types and IDs.
407 /*! Allocates a new cram_block structure with a specified content_type and
411 * Returns block pointer on success;
414 cram_block
*cram_new_block(enum cram_content_type content_type
,
417 /*! Reads a block from a cram file.
420 * Returns cram_block pointer on success;
423 cram_block
*cram_read_block(cram_fd
*fd
);
425 /*! Writes a CRAM block.
428 * Returns 0 on success;
431 int cram_write_block(cram_fd
*fd
, cram_block
*b
);
433 /*! Frees a CRAM block, deallocating internal data too.
435 void cram_free_block(cram_block
*b
);
437 /*! Uncompress a memory block using Zlib.
440 * Returns 0 on success;
443 char *zlib_mem_inflate(char *cdata
, size_t csize
, size_t *size
);
445 /*! Uncompresses a CRAM block, if compressed.
448 * Returns 0 on success;
451 int cram_uncompress_block(cram_block
*b
);
453 /*! Compresses a block.
455 * Compresses a block using one of two different zlib strategies. If we only
456 * want one choice set strat2 to be -1.
458 * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
459 * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
460 * significantly faster.
463 * Returns 0 on success;
466 int cram_compress_block(cram_fd
*fd
, cram_block
*b
, cram_metrics
*metrics
,
467 int method
, int level
);
469 cram_metrics
*cram_new_metrics(void);
470 char *cram_block_method2str(enum cram_block_method m
);
471 char *cram_content_type2str(enum cram_content_type t
);
474 * Find an external block by its content_id
477 static inline cram_block
*cram_get_block_by_id(cram_slice
*slice
, int id
) {
478 if (slice
->block_by_id
&& id
>= 0 && id
< 1024) {
479 return slice
->block_by_id
[id
];
482 for (i
= 0; i
< slice
->hdr
->num_blocks
; i
++) {
483 cram_block
*b
= slice
->block
[i
];
484 if (b
&& b
->content_type
== EXTERNAL
&& b
->content_id
== id
)
491 /* --- Accessor macros for manipulating blocks on a byte by byte basis --- */
493 /* Block size and data pointer. */
494 #define BLOCK_SIZE(b) ((b)->byte)
495 #define BLOCK_DATA(b) ((b)->data)
497 /* Returns the address one past the end of the block */
498 #define BLOCK_END(b) (&(b)->data[(b)->byte])
500 /* Request block to be at least 'l' bytes long */
501 #define BLOCK_RESIZE(b,l) \
503 while((b)->alloc <= (l)) { \
504 (b)->alloc = (b)->alloc ? (b)->alloc*1.5 : 1024; \
505 (b)->data = realloc((b)->data, (b)->alloc); \
509 /* Make block exactly 'l' bytes long */
510 #define BLOCK_RESIZE_EXACT(b,l) \
513 (b)->data = realloc((b)->data, (b)->alloc); \
516 /* Ensure the block can hold at least another 'l' bytes */
517 #define BLOCK_GROW(b,l) BLOCK_RESIZE((b), BLOCK_SIZE((b)) + (l))
519 /* Append string 's' of length 'l' */
520 #define BLOCK_APPEND(b,s,l) \
522 BLOCK_GROW((b),(l)); \
523 memcpy(BLOCK_END((b)), (s), (l)); \
524 BLOCK_SIZE((b)) += (l); \
527 /* Append as single character 'c' */
528 #define BLOCK_APPEND_CHAR(b,c) \
531 (b)->data[(b)->byte++] = (c); \
534 /* Append a single unsigned integer */
535 #define BLOCK_APPEND_UINT(b,i) \
538 BLOCK_GROW((b),11); \
539 cp = &(b)->data[(b)->byte]; \
540 (b)->byte += append_uint32(cp, (i)) - cp; \
543 static inline unsigned char *append_uint32(unsigned char *cp
, uint32_t i
) {
551 if (i
< 100) goto b1
;
552 if (i
< 10000) goto b3
;
553 if (i
< 1000000) goto b5
;
554 if (i
< 100000000) goto b7
;
556 if ((j
= i
/ 1000000000)) {*cp
++ = j
+ '0'; i
-= j
*1000000000; goto x8
;}
557 if ((j
= i
/ 100000000)) {*cp
++ = j
+ '0'; i
-= j
*100000000; goto x7
;}
558 b7
:if ((j
= i
/ 10000000)) {*cp
++ = j
+ '0'; i
-= j
*10000000; goto x6
;}
559 if ((j
= i
/ 1000000)) {*cp
++ = j
+ '0', i
-= j
*1000000; goto x5
;}
560 b5
:if ((j
= i
/ 100000)) {*cp
++ = j
+ '0', i
-= j
*100000; goto x4
;}
561 if ((j
= i
/ 10000)) {*cp
++ = j
+ '0', i
-= j
*10000; goto x3
;}
562 b3
:if ((j
= i
/ 1000)) {*cp
++ = j
+ '0', i
-= j
*1000; goto x2
;}
563 if ((j
= i
/ 100)) {*cp
++ = j
+ '0', i
-= j
*100; goto x1
;}
564 b1
:if ((j
= i
/ 10)) {*cp
++ = j
+ '0', i
-= j
*10; goto x0
;}
565 if (i
) *cp
++ = i
+ '0';
568 x8
: *cp
++ = i
/ 100000000 + '0', i
%= 100000000;
569 x7
: *cp
++ = i
/ 10000000 + '0', i
%= 10000000;
570 x6
: *cp
++ = i
/ 1000000 + '0', i
%= 1000000;
571 x5
: *cp
++ = i
/ 100000 + '0', i
%= 100000;
572 x4
: *cp
++ = i
/ 10000 + '0', i
%= 10000;
573 x3
: *cp
++ = i
/ 1000 + '0', i
%= 1000;
574 x2
: *cp
++ = i
/ 100 + '0', i
%= 100;
575 x1
: *cp
++ = i
/ 10 + '0', i
%= 10;
581 static inline unsigned char *append_sub32(unsigned char *cp
, uint32_t i
) {
582 *cp
++ = i
/ 100000000 + '0', i
%= 100000000;
583 *cp
++ = i
/ 10000000 + '0', i
%= 10000000;
584 *cp
++ = i
/ 1000000 + '0', i
%= 1000000;
585 *cp
++ = i
/ 100000 + '0', i
%= 100000;
586 *cp
++ = i
/ 10000 + '0', i
%= 10000;
587 *cp
++ = i
/ 1000 + '0', i
%= 1000;
588 *cp
++ = i
/ 100 + '0', i
%= 100;
589 *cp
++ = i
/ 10 + '0', i
%= 10;
595 static inline unsigned char *append_uint64(unsigned char *cp
, uint64_t i
) {
599 return append_uint32(cp
, i
);
601 if ((j
= i
/1000000000) > 1000000000) {
602 cp
= append_uint32(cp
, j
/1000000000);
604 cp
= append_sub32(cp
, j
);
606 cp
= append_uint32(cp
, i
/ 1000000000);
608 cp
= append_sub32(cp
, i
% 1000000000);
613 #define BLOCK_UPLEN(b) \
614 (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b))
617 /**@{ ----------------------------------------------------------------------
618 * Reference sequence handling
621 /*! Loads a reference set from fn and stores in the cram_fd.
624 * Returns 0 on success;
627 int cram_load_reference(cram_fd
*fd
, char *fn
);
629 /*! Generates a lookup table in refs based on the SQ headers in SAM_hdr.
631 * Indexes references by the order they appear in a BAM file. This may not
632 * necessarily be the same order they appear in the fasta reference file.
635 * Returns 0 on success;
638 int refs2id(refs_t
*r
, SAM_hdr
*bfd
);
640 void refs_free(refs_t
*r
);
642 /*! Returns a portion of a reference sequence from start to end inclusive.
644 * The returned pointer is owned by the cram_file fd and should not be freed
645 * by the caller. It is valid only until the next cram_get_ref is called
646 * with the same fd parameter (so is thread-safe if given multiple files).
648 * To return the entire reference sequence, specify start as 1 and end
652 * Returns reference on success;
655 char *cram_get_ref(cram_fd
*fd
, int id
, int start
, int end
);
656 void cram_ref_incr(refs_t
*r
, int id
);
657 void cram_ref_decr(refs_t
*r
, int id
);
659 /**@{ ----------------------------------------------------------------------
663 /*! Creates a new container, specifying the maximum number of slices
664 * and records permitted.
667 * Returns cram_container ptr on success;
670 cram_container
*cram_new_container(int nrec
, int nslice
);
671 void cram_free_container(cram_container
*c
);
673 /*! Reads a container header.
676 * Returns cram_container on success;
677 * NULL on failure or no container left (fd->err == 0).
679 cram_container
*cram_read_container(cram_fd
*fd
);
681 /*! Writes a container structure.
684 * Returns 0 on success;
687 int cram_write_container(cram_fd
*fd
, cram_container
*h
);
689 /*! Flushes a container to disk.
691 * Flushes a completely or partially full container to disk, writing
692 * container structure, header and blocks. This also calls the encoder
696 * Returns 0 on success;
699 int cram_flush_container(cram_fd
*fd
, cram_container
*c
);
700 int cram_flush_container_mt(cram_fd
*fd
, cram_container
*c
);
704 /**@{ ----------------------------------------------------------------------
705 * Compression headers; the first part of the container
708 /*! Creates a new blank container compression header
711 * Returns header ptr on success;
714 cram_block_compression_hdr
*cram_new_compression_header(void);
716 /*! Frees a cram_block_compression_hdr */
717 void cram_free_compression_header(cram_block_compression_hdr
*hdr
);
721 /**@{ ----------------------------------------------------------------------
722 * Slices and slice headers
725 /*! Frees a slice header */
726 void cram_free_slice_header(cram_block_slice_hdr
*hdr
);
729 void cram_free_slice(cram_slice
*s
);
731 /*! Creates a new empty slice in memory, for subsequent writing to
735 * Returns cram_slice ptr on success;
738 cram_slice
*cram_new_slice(enum cram_content_type type
, int nrecs
);
740 /*! Loads an entire slice.
742 * FIXME: In 1.0 the native unit of slices within CRAM is broken
743 * as slices contain references to objects in other slices.
744 * To work around this while keeping the slice oriented outer loop
745 * we read all slices and stitch them together into a fake large
749 * Returns cram_slice ptr on success;
752 cram_slice
*cram_read_slice(cram_fd
*fd
);
757 /**@{ ----------------------------------------------------------------------
758 * CRAM file definition (header)
761 /*! Reads a CRAM file definition structure.
764 * Returns file_def ptr on success;
767 cram_file_def
*cram_read_file_def(cram_fd
*fd
);
769 /*! Writes a cram_file_def structure to cram_fd.
772 * Returns 0 on success;
775 int cram_write_file_def(cram_fd
*fd
, cram_file_def
*def
);
777 /*! Frees a cram_file_def structure. */
778 void cram_free_file_def(cram_file_def
*def
);
782 /**@{ ----------------------------------------------------------------------
786 /*! Reads the SAM header from the first CRAM data block.
788 * Also performs minimal parsing to extract read-group
789 * and sample information.
792 * Returns SAM hdr ptr on success;
795 SAM_hdr
*cram_read_SAM_hdr(cram_fd
*fd
);
797 /*! Writes a CRAM SAM header.
800 * Returns 0 on success;
803 int cram_write_SAM_hdr(cram_fd
*fd
, SAM_hdr
*hdr
);
807 /**@{ ----------------------------------------------------------------------
808 * The top-level cram opening, closing and option handling
811 /*! Opens a CRAM file for read (mode "rb") or write ("wb").
813 * The filename may be "-" to indicate stdin or stdout.
816 * Returns file handle on success;
819 cram_fd
*cram_open(const char *filename
, const char *mode
);
821 /*! Opens an existing stream for reading or writing.
824 * Returns file handle on success;
827 cram_fd
*cram_dopen(struct hFILE
*fp
, const char *filename
, const char *mode
);
829 /*! Closes a CRAM file.
832 * Returns 0 on success;
835 int cram_close(cram_fd
*fd
);
838 * Seek within a CRAM file.
840 * Returns 0 on success
843 int cram_seek(cram_fd
*fd
, off_t offset
, int whence
);
846 * Flushes a CRAM file.
847 * Useful for when writing to stdout without wishing to close the stream.
849 * Returns 0 on success
852 int cram_flush(cram_fd
*fd
);
854 /*! Checks for end of file on a cram_fd stream.
857 * Returns 0 if not at end of file
858 * 1 if we hit an expected EOF (end of range or EOF block)
859 * 2 for other EOF (end of stream without EOF block)
861 int cram_eof(cram_fd
*fd
);
863 /*! Sets options on the cram_fd.
865 * See CRAM_OPT_* definitions in cram_structs.h.
866 * Use this immediately after opening.
869 * Returns 0 on success;
872 int cram_set_option(cram_fd
*fd
, enum hts_fmt_option opt
, ...);
874 /*! Sets options on the cram_fd.
876 * See CRAM_OPT_* definitions in cram_structs.h.
877 * Use this immediately after opening.
880 * Returns 0 on success;
883 int cram_set_voption(cram_fd
*fd
, enum hts_fmt_option opt
, va_list args
);
886 * Attaches a header to a cram_fd.
888 * This should be used when creating a new cram_fd for writing where
889 * we have an SAM_hdr already constructed (eg from a file we've read
893 * Returns 0 on success;
896 int cram_set_header(cram_fd
*fd
, SAM_hdr
*hdr
);
899 * Returns the hFILE connected to a cram_fd.
901 static inline struct hFILE
*cram_hfile(cram_fd
*fd
) {
909 #endif /* _CRAM_IO_H_ */