3 #include <fcntl.h> // open, above 3
4 #include <unistd.h> // read, close
6 #include <string.h> // memcmp
7 #include <stdlib.h> //calloc
8 #include <stdio.h> // puts
9 #include <stdint.h> // uint_fast8_t
10 //#include <zlib.h> // already in gkseq.h
11 #include "gkseq.h" // No more `kseq.h` with tons of Macros !!!
15 //KSEQ_INIT(gzFile, gzread) // [kseq.h] Just like include, to inline some static inline functions.
30 ssize_t
read_kseq_no2bit(SeqFileObj
* const seqObj
) {
31 size_t seqlen
; // in fact size_t, but minus values are meanful.
32 int_fast8_t rvalue
= kseq_read(seqObj
->fobj
);
33 //fputs("<--->", stderr);
35 uint_fast8_t type
= rvalue
; // 1 or 3. No need to &3 now.
39 seqObj
->name
= kseq
->name
.s
;
40 if (! kseq
->comment
.l
) seqObj
->comment
= NULL
;
41 else seqObj
->comment
= kseq
->comment
.s
;
42 seqObj
->seq
= kseq
->seq
.s
;
43 NormalizeChrSeq(kseq
->seq
.s
); // to /[ATCGN]*/
44 if (rvalue
&2) { // withQ
47 seqObj
->qual
= kseq
->qual
.s
;
51 seqObj
->readlength
= seqlen
;
56 ssize_t
read_kseq_with2bit(SeqFileObj
* const seqObj
) {
57 size_t seqlen
; // in fact size_t, but minus values are meanful.
58 int_fast8_t rvalue
= kseq_read(seqObj
->fobj
);
60 uint_fast8_t type
= rvalue
; // 1 or 3. No need to &3 now.
64 seqObj
->name
= kseq
->name
.s
;
65 if (! kseq
->comment
.l
) seqObj
->comment
= NULL
;
66 else seqObj
->comment
= kseq
->comment
.s
;
67 seqObj
->seq
= kseq
->seq
.s
;
68 if (rvalue
&2) { // withQ
71 seqObj
->qual
= kseq
->qual
.s
;
75 size_t needtomallocQQW
= (seqlen
+31u)>>5; // 1 "QQWord" = 4 QWord = 32 bp. Well, I say there is QQW.
76 if (needtomallocQQW
> seqObj
->binMallocedQQWord
) {
77 KROUNDUP32(needtomallocQQW
);
78 seqObj
->binMallocedQQWord
= needtomallocQQW
;
79 seqObj
->diBseq
= realloc(seqObj
->diBseq
,needtomallocQQW
<<3); // 2^3=8
80 seqObj
->hexBQ
= realloc(seqObj
->hexBQ
,needtomallocQQW
<<5); // 4*2^3=32
82 seqObj
->binNcount
= base2dbit(seqlen
, kseq
->seq
.s
, seqObj
->qual
, seqObj
->diBseq
, seqObj
->hexBQ
);
83 // printf("-[%s]<%s><%zx>[%s]-\n",kseq->seq.s, qstr, seqObj->diBseq[0], unit2basechr(seqObj->diBseq[0]));
84 // Well, how to deal with smallcase masking ? Not using this information yet.
85 NormalizeChrSeq(kseq
->seq
.s
); // to /[ATCGN]*/
86 seqObj
->readlength
= seqlen
;
91 void close_kseq(SeqFileObj
* const seqObj
){
92 gzFile fp
= ((kseq_t
*)seqObj
->fobj
)->f
->f
;
94 kseq_destroy(seqObj
->fobj
);
97 // binmode: 1=base char, 2=base 2bit
98 SeqFileObj
* inSeqFinit(const char * const filename
, unsigned char binmode
) {
101 char FileID
[G_HEADER_LENGTH
];
102 fd
= open(filename
, O_RDONLY
);
103 if (fd
== -1) {warn("\n[x]Cannot open Sequence_file [%s]", filename
); return NULL
;}
104 fdstat
= read(fd
, FileID
, G_HEADER_LENGTH
);
105 if ( fdstat
!= G_HEADER_LENGTH
) {
106 warn("[!]Too short (%zd) for sequence_file [%s].", fdstat
, filename
);
111 warn("[!]ErrNo:[%zd] while closing file [%s].", fdstat
, filename
);
113 SeqFileObj
* const seqObj
= malloc(sizeof(SeqFileObj
)); // no more calloc(), just {.datePos[1]=0} is needed for kseq way.
114 //CANNOT use `&(SeqFileObj) {.datePos={0}};`, which is of a short lifetime !
116 if ( memcmp(G_TYPE_FAQC
, FileID
, G_HEADER_LENGTH
) ) { // Not G_TYPE_FAQC, thus to kseq.h
120 fp
= gzopen(filename
, "r");
121 if (! fp
) err(EXIT_FAILURE
, "\n[x]Cannot open with zlib for [%s]", filename
);
122 seq
= kseq_init(fp
); // calloc, thus safe
123 seqObj
->datePos
[0] = -1; // seeking not available here.
124 seqObj
->datePos
[1] = 0;
126 //seqObj->name = &seq->name.s;
127 //seqObj->comment = &seq->comment.s;
128 //seqObj->seq = &seq->seq.s;
129 //seqObj->qual = &seq->qual.s;
130 //seqObj->readlength = &seq->seq.l;
132 if (binmode
& GFIODIBBASE
) {
133 seqObj
->getNextSeq
= read_kseq_with2bit
; // (ssize_t (*)(void*))
135 seqObj
->getNextSeq
= read_kseq_no2bit
;
137 seqObj
->closefh
= close_kseq
;
138 seqObj
->diBseq
= NULL
; // We need NULL to free ...
139 seqObj
->hexBQ
= NULL
;
140 seqObj
->binMallocedQQWord
= 0;
141 seqObj
->binNcount
= 0;
142 //seqObj->readlength = 0;
143 //int seqlen; // TEST ONLY !
144 //seqlen = (*seqObj->getNextSeq)(seqObj->fh); //seqlen = kseq_read(seq); // TEST ONLY !
146 } else { // is G_TYPE_FAQC
147 errx(EXIT_FAILURE
, "\n[:(]FAQC file not supported now.");
154 int_fast8_t read_kseq_no2bit()
155 int_fast8_t read_kseq_with2bit()
156 int_fast8_t read_faqc_nobasechar()
157 int_fast8_t read_faqc_withbasechar()
160 ssize_t inSeqFreadNext(SeqFileObj * const seqObj) {
161 ssize_t seqlen; // in fact size_t, but minus values are meanful.
162 seqlen = (*seqObj->getNextSeq)(seqObj);
163 //seqObj->hasQ = (*seqObj->qual != 0);
167 void inSeqFdestroy(SeqFileObj
* const seqObj
) {
168 (*seqObj
->closefh
)(seqObj
);
169 free(seqObj
->diBseq
);