modified: myjupyterlab.sh
[GalaxyCodeBases.git] / c_cpp / lib / htslib / cram / cram_structs.h
blob5165605b6d85d6d3d2de02d7b9067d3f99f38a91
1 /*
2 Copyright (c) 2012-2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef _CRAM_STRUCTS_H_
32 #define _CRAM_STRUCTS_H_
35 * Defines in-memory structs for the basic file-format objects in the
36 * CRAM format.
38 * The basic file format is:
39 * File-def SAM-hdr Container Container ...
41 * Container:
42 * Service-block data-block data-block ...
44 * Multiple blocks in a container are grouped together as slices,
45 * also sometimes referred to as landmarks in the spec.
49 #include <pthread.h>
50 #include <stdint.h>
51 #include <sys/types.h>
53 #include "htslib/thread_pool.h"
54 #include "cram/string_alloc.h"
55 #include "cram/mFILE.h"
56 #include "htslib/khash.h"
58 #ifdef __cplusplus
59 extern "C" {
60 #endif
62 // Generic hash-map integer -> integer
63 KHASH_MAP_INIT_INT(m_i2i, int)
65 // Generic hash-set integer -> (existance)
66 KHASH_SET_INIT_INT(s_i2i)
68 // For brevity
69 typedef unsigned char uc;
72 * A union for the preservation map. Required for khash.
74 typedef union {
75 int i;
76 char *p;
77 } pmap_t;
79 // Generates static functions here which isn't ideal, but we have no way
80 // currently to declare the kh_map_t structure here without also declaring a
81 // duplicate in the .c files due to the nature of the KHASH macros.
82 KHASH_MAP_INIT_STR(map, pmap_t)
84 struct hFILE;
86 #define SEQS_PER_SLICE 10000
87 #define BASES_PER_SLICE (SEQS_PER_SLICE*500)
88 #define SLICE_PER_CNT 1
90 #define CRAM_SUBST_MATRIX "CGTNAGTNACTNACGNACGT"
92 #define MAX_STAT_VAL 1024
93 //#define MAX_STAT_VAL 16
94 typedef struct cram_stats {
95 int freqs[MAX_STAT_VAL];
96 khash_t(m_i2i) *h;
97 int nsamp; // total number of values added
98 int nvals; // total number of unique values added
99 } cram_stats;
101 /* NB: matches java impl, not the spec */
102 enum cram_encoding {
103 E_NULL = 0,
104 E_EXTERNAL = 1,
105 E_GOLOMB = 2,
106 E_HUFFMAN = 3,
107 E_BYTE_ARRAY_LEN = 4,
108 E_BYTE_ARRAY_STOP = 5,
109 E_BETA = 6,
110 E_SUBEXP = 7,
111 E_GOLOMB_RICE = 8,
112 E_GAMMA = 9,
113 E_NUM_CODECS = 10, /* Number of codecs, not a real one. */
116 enum cram_external_type {
117 E_INT = 1,
118 E_LONG = 2,
119 E_BYTE = 3,
120 E_BYTE_ARRAY = 4,
121 E_BYTE_ARRAY_BLOCK = 5,
124 /* External IDs used by this implementation (only assumed during writing) */
125 enum cram_DS_ID {
126 DS_CORE = 0,
127 DS_aux = 1, // aux_blk
128 DS_aux_OQ = 2,
129 DS_aux_BQ = 3,
130 DS_aux_BD = 4,
131 DS_aux_BI = 5,
132 DS_aux_FZ = 6, // also ZM:B
133 DS_aux_oq = 7, // other qualities
134 DS_aux_os = 8, // other sequences
135 DS_aux_oz = 9, // other strings
136 DS_ref,
137 DS_RN, // name_blk
138 DS_QS, // qual_blk
139 DS_IN, // base_blk
140 DS_SC, // soft_blk
142 DS_BF, // start loop
143 DS_CF,
144 DS_AP,
145 DS_RG,
146 DS_MQ,
147 DS_NS,
148 DS_MF,
149 DS_TS,
150 DS_NP,
151 DS_NF,
152 DS_RL,
153 DS_FN,
154 DS_FC,
155 DS_FP,
156 DS_DL,
157 DS_BA,
158 DS_BS,
159 DS_TL,
160 DS_RI,
161 DS_RS,
162 DS_PD,
163 DS_HC,
164 DS_BB,
165 DS_QQ,
167 DS_TN, // end loop
169 DS_RN_len,
170 DS_SC_len,
171 DS_BB_len,
172 DS_QQ_len,
174 DS_TC, // CRAM v1.0 tags
175 DS_TM, // test
176 DS_TV, // test
178 DS_END,
181 /* "File Definition Structure" */
182 typedef struct cram_file_def {
183 char magic[4];
184 uint8_t major_version;
185 uint8_t minor_version;
186 char file_id[20]; // Filename or SHA1 checksum
187 } cram_file_def;
189 #define CRAM_MAJOR_VERS(v) ((v) >> 8)
190 #define CRAM_MINOR_VERS(v) ((v) & 0xff)
192 struct cram_slice;
194 enum cram_block_method {
195 ERROR = -1,
196 RAW = 0,
197 GZIP = 1,
198 BZIP2 = 2,
199 LZMA = 3,
200 RANS = 4, // Generic; either order
201 RANS0 = 4,
202 RANS1 = 10, // Not externalised; stored as RANS (generic)
203 GZIP_RLE = 11, // NB: not externalised in CRAM
206 enum cram_content_type {
207 CT_ERROR = -1,
208 FILE_HEADER = 0,
209 COMPRESSION_HEADER = 1,
210 MAPPED_SLICE = 2,
211 UNMAPPED_SLICE = 3, // CRAM V1.0 only
212 EXTERNAL = 4,
213 CORE = 5,
216 /* Compression metrics */
217 typedef struct {
218 // number of trials and time to next trial
219 int trial;
220 int next_trial;
222 // aggregate sizes during trials
223 int sz_gz_rle;
224 int sz_gz_def;
225 int sz_rans0;
226 int sz_rans1;
227 int sz_bzip2;
228 int sz_lzma;
230 // resultant method from trials
231 int method;
232 int strat;
234 // Revisions of method, to allow culling of continually failing ones.
235 int gz_rle_cnt;
236 int gz_def_cnt;
237 int rans0_cnt;
238 int rans1_cnt;
239 int bzip2_cnt;
240 int lzma_cnt;
241 int revised_method;
243 double gz_rle_extra;
244 double gz_def_extra;
245 double rans0_extra;
246 double rans1_extra;
247 double bzip2_extra;
248 double lzma_extra;
249 } cram_metrics;
251 // Hash aux key (XX:i) to cram_metrics
252 KHASH_MAP_INIT_INT(m_metrics, cram_metrics*)
255 /* Block */
256 typedef struct cram_block {
257 enum cram_block_method method, orig_method;
258 enum cram_content_type content_type;
259 int32_t content_id;
260 int32_t comp_size;
261 int32_t uncomp_size;
262 uint32_t crc32;
263 int32_t idx; /* offset into data */
264 unsigned char *data;
266 // For bit I/O
267 size_t alloc;
268 size_t byte;
269 int bit;
271 // To aid compression
272 cram_metrics *m; // used to track aux block compression only
273 } cram_block;
275 struct cram_codec; /* defined in cram_codecs.h */
276 struct cram_map;
278 #define CRAM_MAP_HASH 32
279 #define CRAM_MAP(a,b) (((a)*3+(b))&(CRAM_MAP_HASH-1))
281 /* Compression header block */
282 typedef struct cram_block_compression_hdr {
283 int32_t ref_seq_id;
284 int32_t ref_seq_start;
285 int32_t ref_seq_span;
286 int32_t num_records;
287 int32_t num_landmarks;
288 int32_t *landmark;
290 /* Flags from preservation map */
291 int mapped_qs_included;
292 int unmapped_qs_included;
293 int unmapped_placed;
294 int qs_included;
295 int read_names_included;
296 int AP_delta;
297 // indexed by ref-base and subst. code
298 char substitution_matrix[5][4];
300 // TD Dictionary as a concatenated block
301 cram_block *TD_blk; // Tag Dictionary
302 int nTL; // number of TL entries in TD
303 unsigned char **TL; // array of size nTL, pointer into TD_blk.
304 khash_t(m_s2i) *TD_hash; // Keyed on TD strings, map to TL[] indices
305 string_alloc_t *TD_keys; // Pooled keys for TD hash.
307 khash_t(map) *preservation_map;
308 struct cram_map *rec_encoding_map[CRAM_MAP_HASH];
309 struct cram_map *tag_encoding_map[CRAM_MAP_HASH];
311 struct cram_codec *codecs[DS_END];
313 char *uncomp; // A single block of uncompressed data
314 size_t uncomp_size, uncomp_alloc;
316 unsigned int data_series; // See cram_fields enum below
317 } cram_block_compression_hdr;
319 typedef struct cram_map {
320 int key; /* 0xe0 + 3 bytes */
321 enum cram_encoding encoding;
322 int offset; /* Offset into a single block of memory */
323 int size; /* Size */
324 struct cram_codec *codec;
325 struct cram_map *next; // for noddy internal hash
326 } cram_map;
328 typedef struct cram_tag_map {
329 struct cram_codec *codec;
330 cram_block *blk;
331 cram_metrics *m;
332 } cram_tag_map;
334 // Hash aux key (XX:i) to cram_tag_map
335 KHASH_MAP_INIT_INT(m_tagmap, cram_tag_map*)
337 /* Mapped or unmapped slice header block */
338 typedef struct cram_block_slice_hdr {
339 enum cram_content_type content_type;
340 int32_t ref_seq_id; /* if content_type == MAPPED_SLICE */
341 int32_t ref_seq_start; /* if content_type == MAPPED_SLICE */
342 int32_t ref_seq_span; /* if content_type == MAPPED_SLICE */
343 int32_t num_records;
344 int64_t record_counter;
345 int32_t num_blocks;
346 int32_t num_content_ids;
347 int32_t *block_content_ids;
348 int32_t ref_base_id; /* if content_type == MAPPED_SLICE */
349 unsigned char md5[16];
350 } cram_block_slice_hdr;
352 struct ref_entry;
355 * Container.
357 * Conceptually a container is split into slices, and slices into blocks.
358 * However on disk it's just a list of blocks and we need to query the
359 * block types to identify the start/end points of the slices.
361 * OR... are landmarks the start/end points of slices?
363 typedef struct cram_container {
364 int32_t length;
365 int32_t ref_seq_id;
366 int32_t ref_seq_start;
367 int32_t ref_seq_span;
368 int64_t record_counter;
369 int64_t num_bases;
370 int32_t num_records;
371 int32_t num_blocks;
372 int32_t num_landmarks;
373 int32_t *landmark;
375 /* Size of container header above */
376 size_t offset;
378 /* Compression header is always the first block? */
379 cram_block_compression_hdr *comp_hdr;
380 cram_block *comp_hdr_block;
382 /* For construction purposes */
383 int max_slice, curr_slice; // maximum number of slices
384 int max_rec, curr_rec; // current and max recs per slice
385 int max_c_rec, curr_c_rec; // current and max recs per container
386 int slice_rec; // rec no. for start of this slice
387 int curr_ref; // current ref ID. -2 for no previous
388 int last_pos; // last record position
389 struct cram_slice **slices, *slice;
390 int pos_sorted; // boolean, 1=>position sorted data
391 int max_apos; // maximum position, used if pos_sorted==0
392 int last_slice; // number of reads in last slice (0 for 1st)
393 int multi_seq; // true if packing multi seqs per cont/slice
394 int unsorted; // true is AP_delta is 0.
396 /* Copied from fd before encoding, to allow multi-threading */
397 int ref_start, first_base, last_base, ref_id, ref_end;
398 char *ref;
399 //struct ref_entry *ref;
401 /* For multi-threading */
402 bam_seq_t **bams;
404 /* Statistics for encoding */
405 cram_stats *stats[DS_END];
407 khash_t(m_tagmap) *tags_used; // set of tag types in use, for tag encoding map
408 int *refs_used; // array of frequency of ref seq IDs
410 uint32_t crc32; // CRC32
412 uint64_t s_num_bases; // number of bases in this slice
413 } cram_container;
416 * A single cram record
418 typedef struct cram_record {
419 struct cram_slice *s; // Filled out by cram_decode only
421 int32_t ref_id; // fixed for all recs in slice?
422 int32_t flags; // BF
423 int32_t cram_flags; // CF
424 int32_t len; // RL
425 int32_t apos; // AP
426 int32_t rg; // RG
427 int32_t name; // RN; idx to s->names_blk
428 int32_t name_len;
429 int32_t mate_line; // index to another cram_record
430 int32_t mate_ref_id;
431 int32_t mate_pos; // NP
432 int32_t tlen; // TS
434 // Auxiliary data
435 int32_t ntags; // TC
436 int32_t aux; // idx to s->aux_blk
437 int32_t aux_size; // total size of packed ntags in aux_blk
438 #ifndef TN_external
439 int32_t TN_idx; // TN; idx to s->TN;
440 #else
441 int32_t tn; // idx to s->tn_blk
442 #endif
443 int TL;
445 int32_t seq; // idx to s->seqs_blk
446 int32_t qual; // idx to s->qual_blk
447 int32_t cigar; // idx to s->cigar
448 int32_t ncigar;
449 int32_t aend; // alignment end
450 int32_t mqual; // MQ
452 int32_t feature; // idx to s->feature
453 int32_t nfeature; // number of features
454 int32_t mate_flags; // MF
455 } cram_record;
457 // Accessor macros as an analogue of the bam ones
458 #define cram_qname(c) (&(c)->s->name_blk->data[(c)->name])
459 #define cram_seq(c) (&(c)->s->seqs_blk->data[(c)->seq])
460 #define cram_qual(c) (&(c)->s->qual_blk->data[(c)->qual])
461 #define cram_aux(c) (&(c)->s->aux_blk->data[(c)->aux])
462 #define cram_seqi(c,i) (cram_seq((c))[(i)])
463 #define cram_name_len(c) ((c)->name_len)
464 #define cram_strand(c) (((c)->flags & BAM_FREVERSE) != 0)
465 #define cram_mstrand(c) (((c)->flags & BAM_FMREVERSE) != 0)
466 #define cram_cigar(c) (&((cr)->s->cigar)[(c)->cigar])
469 * A feature is a base difference, used for the sequence reference encoding.
470 * (We generate these internally when writing CRAM.)
472 typedef struct cram_feature {
473 union {
474 struct {
475 int pos;
476 int code;
477 int base; // substitution code
478 } X;
479 struct {
480 int pos;
481 int code;
482 int base; // actual base & qual
483 int qual;
484 } B;
485 struct {
486 int pos;
487 int code;
488 int seq_idx; // index to s->seqs_blk
489 int len;
490 } b;
491 struct {
492 int pos;
493 int code;
494 int qual;
495 } Q;
496 struct {
497 int pos;
498 int code;
499 int len;
500 int seq_idx; // soft-clip multiple bases
501 } S;
502 struct {
503 int pos;
504 int code;
505 int len;
506 int seq_idx; // insertion multiple bases
507 } I;
508 struct {
509 int pos;
510 int code;
511 int base; // insertion single base
512 } i;
513 struct {
514 int pos;
515 int code;
516 int len;
517 } D;
518 struct {
519 int pos;
520 int code;
521 int len;
522 } N;
523 struct {
524 int pos;
525 int code;
526 int len;
527 } P;
528 struct {
529 int pos;
530 int code;
531 int len;
532 } H;
534 } cram_feature;
537 * A slice is really just a set of blocks, but it
538 * is the logical unit for decoding a number of
539 * sequences.
541 typedef struct cram_slice {
542 cram_block_slice_hdr *hdr;
543 cram_block *hdr_block;
544 cram_block **block;
545 cram_block **block_by_id;
547 /* State used during encoding/decoding */
548 int last_apos, max_apos;
550 /* Array of decoded cram records */
551 cram_record *crecs;
553 /* An dynamically growing buffers for data pointed
554 * to by crecs[] array.
556 uint32_t *cigar;
557 uint32_t cigar_alloc;
558 uint32_t ncigar;
560 cram_feature *features;
561 int nfeatures;
562 int afeatures; // allocated size of features
564 #ifndef TN_external
565 // TN field (Tag Name)
566 uint32_t *TN;
567 int nTN, aTN; // used and allocated size for TN[]
568 #else
569 cram_block *tn_blk;
570 int tn_id;
571 #endif
573 // For variable sized elements which are always external blocks.
574 cram_block *name_blk;
575 cram_block *seqs_blk;
576 cram_block *qual_blk;
577 cram_block *base_blk;
578 cram_block *soft_blk;
579 cram_block *aux_blk; // BAM aux block, created while decoding CRAM
581 string_alloc_t *pair_keys; // Pooled keys for pair hash.
582 khash_t(m_s2i) *pair[2]; // for identifying read-pairs in this slice.
584 char *ref; // slice of current reference
585 int ref_start; // start position of current reference;
586 int ref_end; // end position of current reference;
587 int ref_id;
589 // For going from BAM to CRAM; an array of auxiliary blocks per type
590 int naux_block;
591 cram_block **aux_block;
592 } cram_slice;
594 /*-----------------------------------------------------------------------------
595 * Consider moving reference handling to cram_refs.[ch]
597 // from fa.fai / samtools faidx files
598 typedef struct ref_entry {
599 char *name;
600 char *fn;
601 int64_t length;
602 int64_t offset;
603 int bases_per_line;
604 int line_length;
605 int64_t count; // for shared references so we know to dealloc seq
606 char *seq;
607 mFILE *mf;
608 int is_md5; // Reference comes from a raw seq found by MD5
609 } ref_entry;
611 KHASH_MAP_INIT_STR(refs, ref_entry*)
613 // References structure.
614 typedef struct {
615 string_alloc_t *pool; // String pool for holding filenames and SN vals
617 khash_t(refs) *h_meta; // ref_entry*, index by name
618 ref_entry **ref_id; // ref_entry*, index by ID
619 int nref; // number of ref_entry
621 char *fn; // current file opened
622 BGZF *fp; // and the hFILE* to go with it.
624 int count; // how many cram_fd sharing this refs struct
626 pthread_mutex_t lock; // Mutex for multi-threaded updating
627 ref_entry *last; // Last queried sequence
628 int last_id; // Used in cram_ref_decr_locked to delay free
629 } refs_t;
631 /*-----------------------------------------------------------------------------
632 * CRAM index
634 * Detect format by number of entries per line.
635 * 5 => 1.0 (refid, start, nseq, C offset, slice)
636 * 6 => 1.1 (refid, start, span, C offset, S offset, S size)
638 * Indices are stored in a nested containment list, which is trivial to set
639 * up as the indices are on sorted data so we're appending to the nclist
640 * in sorted order. Basically if a slice entirely fits within a previous
641 * slice then we append to that slices list. This is done recursively.
643 * Lists are sorted on two dimensions: ref id + slice coords.
645 typedef struct cram_index {
646 int nslice, nalloc; // total number of slices
647 struct cram_index *e; // array of size nslice
649 int refid; // 1.0 1.1
650 int start; // 1.0 1.1
651 int end; // 1.1
652 int nseq; // 1.0 - undocumented
653 int slice; // 1.0 landmark index, 1.1 landmark value
654 int len; // 1.1 - size of slice in bytes
655 int64_t offset; // 1.0 1.1
656 } cram_index;
658 typedef struct {
659 int refid;
660 int start;
661 int end;
662 } cram_range;
664 /*-----------------------------------------------------------------------------
666 /* CRAM File handle */
668 typedef struct spare_bams {
669 bam_seq_t **bams;
670 struct spare_bams *next;
671 } spare_bams;
673 typedef struct cram_fd {
674 struct hFILE *fp;
675 int mode; // 'r' or 'w'
676 int version;
677 cram_file_def *file_def;
678 SAM_hdr *header;
680 char *prefix;
681 int64_t record_counter;
682 int err;
684 // Most recent compression header decoded
685 //cram_block_compression_hdr *comp_hdr;
686 //cram_block_slice_hdr *slice_hdr;
688 // Current container being processed.
689 cram_container *ctr;
691 // positions for encoding or decoding
692 int first_base, last_base;
694 // cached reference portion
695 refs_t *refs; // ref meta-data structure
696 char *ref, *ref_free; // current portion held in memory
697 int ref_id;
698 int ref_start;
699 int ref_end;
700 char *ref_fn; // reference fasta filename
702 // compression level and metrics
703 int level;
704 cram_metrics *m[DS_END];
705 khash_t(m_metrics) *tags_used; // cram_metrics[], per tag types in use.
707 // options
708 int decode_md; // Whether to export MD and NM tags
709 int verbose;
710 int seqs_per_slice;
711 int bases_per_slice;
712 int slices_per_container;
713 int embed_ref;
714 int no_ref;
715 int ignore_md5;
716 int use_bz2;
717 int use_rans;
718 int use_lzma;
719 int shared_ref;
720 unsigned int required_fields;
721 cram_range range;
723 // lookup tables, stored here so we can be trivially multi-threaded
724 unsigned int bam_flag_swap[0x1000]; // cram -> bam flags
725 unsigned int cram_flag_swap[0x1000];// bam -> cram flags
726 unsigned char L1[256]; // ACGT{*} ->0123{4}
727 unsigned char L2[256]; // ACGTN{*}->01234{5}
728 char cram_sub_matrix[32][32]; // base substituion codes
730 int index_sz;
731 cram_index *index; // array, sizeof index_sz
732 off_t first_container;
733 int eof;
734 int last_slice; // number of recs encoded in last slice
735 int multi_seq;
736 int unsorted;
737 int empty_container; // Marker for EOF block
739 // thread pool
740 int own_pool;
741 hts_tpool *pool;
742 hts_tpool_process *rqueue;
743 pthread_mutex_t metrics_lock;
744 pthread_mutex_t ref_lock;
745 spare_bams *bl;
746 pthread_mutex_t bam_list_lock;
747 void *job_pending;
748 int ooc; // out of containers.
750 int lossy_read_names; // boolean
751 int tlen_approx; // max TLEN calculation offset.
752 int tlen_zero; // If true, permit tlen 0 (=> tlen calculated)
753 } cram_fd;
755 // Translation of required fields to cram data series
756 enum cram_fields {
757 CRAM_BF = 0x00000001,
758 CRAM_AP = 0x00000002,
759 CRAM_FP = 0x00000004,
760 CRAM_RL = 0x00000008,
761 CRAM_DL = 0x00000010,
762 CRAM_NF = 0x00000020,
763 CRAM_BA = 0x00000040,
764 CRAM_QS = 0x00000080,
765 CRAM_FC = 0x00000100,
766 CRAM_FN = 0x00000200,
767 CRAM_BS = 0x00000400,
768 CRAM_IN = 0x00000800,
769 CRAM_RG = 0x00001000,
770 CRAM_MQ = 0x00002000,
771 CRAM_TL = 0x00004000,
772 CRAM_RN = 0x00008000,
773 CRAM_NS = 0x00010000,
774 CRAM_NP = 0x00020000,
775 CRAM_TS = 0x00040000,
776 CRAM_MF = 0x00080000,
777 CRAM_CF = 0x00100000,
778 CRAM_RI = 0x00200000,
779 CRAM_RS = 0x00400000,
780 CRAM_PD = 0x00800000,
781 CRAM_HC = 0x01000000,
782 CRAM_SC = 0x02000000,
783 CRAM_BB = 0x04000000,
784 CRAM_BB_len = 0x08000000,
785 CRAM_QQ = 0x10000000,
786 CRAM_QQ_len = 0x20000000,
787 CRAM_aux= 0x40000000,
788 CRAM_ALL= 0x7fffffff,
791 // A CIGAR opcode, but not necessarily the implications of it. Eg FC/FP may
792 // encode a base difference, but we don't need to know what it is for CIGAR.
793 // If we have a soft-clip or insertion, we do need SC/IN though to know how
794 // long that array is.
795 #define CRAM_CIGAR (CRAM_FN | CRAM_FP | CRAM_FC | CRAM_DL | CRAM_IN | \
796 CRAM_SC | CRAM_HC | CRAM_PD | CRAM_RS | CRAM_RL | CRAM_BF)
798 #define CRAM_SEQ (CRAM_CIGAR | CRAM_BA | CRAM_BS | \
799 CRAM_RL | CRAM_AP | CRAM_BB)
801 #define CRAM_QUAL (CRAM_CIGAR | CRAM_RL | CRAM_AP | CRAM_QS | CRAM_QQ)
803 /* BF bitfields */
804 /* Corrected in 1.1. Use bam_flag_swap[bf] and BAM_* macros for 1.0 & 1.1 */
805 #define CRAM_FPAIRED 256
806 #define CRAM_FPROPER_PAIR 128
807 #define CRAM_FUNMAP 64
808 #define CRAM_FREVERSE 32
809 #define CRAM_FREAD1 16
810 #define CRAM_FREAD2 8
811 #define CRAM_FSECONDARY 4
812 #define CRAM_FQCFAIL 2
813 #define CRAM_FDUP 1
815 #define DS_aux_S "\001"
816 #define DS_aux_OQ_S "\002"
817 #define DS_aux_BQ_S "\003"
818 #define DS_aux_BD_S "\004"
819 #define DS_aux_BI_S "\005"
820 #define DS_aux_FZ_S "\006"
821 #define DS_aux_oq_S "\007"
822 #define DS_aux_os_S "\010"
823 #define DS_aux_oz_S "\011"
825 #define CRAM_M_REVERSE 1
826 #define CRAM_M_UNMAP 2
829 /* CF bitfields */
830 #define CRAM_FLAG_PRESERVE_QUAL_SCORES (1<<0)
831 #define CRAM_FLAG_DETACHED (1<<1)
832 #define CRAM_FLAG_MATE_DOWNSTREAM (1<<2)
833 #define CRAM_FLAG_NO_SEQ (1<<3)
834 #define CRAM_FLAG_MASK ((1<<4)-1)
836 /* Internal only */
837 #define CRAM_FLAG_STATS_ADDED (1<<30)
838 #define CRAM_FLAG_DISCARD_NAME (1<<31)
840 #ifdef __cplusplus
842 #endif
844 #endif /* _CRAM_STRUCTS_H_ */