2 From Heng Li <lh3@sanger.ac.uk>, as in bwa package.
3 Macro translated by Hu Xuesong.
14 #define __GKSEQ_FILETYPE gzFile // Well, EP enough to use define here.
15 #define __GKSEQ_READFUNC gzread // Maybe "lh3" just want to make debug harder for his code ?
16 //#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
17 //#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
18 #define __GKSEQ_BUFSIZE 4096
21 #define KROUNDUP32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
24 //#74 "t.h" // append `KSEQ_INIT(gzFile, gzread)` to kseq.h, remove all `#include`, you will get "t.h".
26 #define KSTRING_T kstring_t
27 typedef struct __kstring_t
{
33 //#215 "t.h" // gcc -E t.h > gkseq.h && indent -kr -brf -l120 -nut t.h -o gkseqn.h
34 typedef struct __kstream_t
{
36 int begin
, end
, is_eof
;
41 kstring_t name
, comment
, seq
, qual
;
46 static inline kstream_t
*ks_init(__GKSEQ_FILETYPE f
) {
47 kstream_t
*ks
= (kstream_t
*) calloc(1, sizeof(kstream_t
));
49 ks
->buf
= (char *) malloc(__GKSEQ_BUFSIZE
);
53 static inline void ks_destroy(kstream_t
* ks
) {
60 static inline int ks_getc(kstream_t
* ks
) {
61 if (ks
->is_eof
&& ks
->begin
>= ks
->end
)
63 if (ks
->begin
>= ks
->end
) {
65 ks
->end
= __GKSEQ_READFUNC(ks
->f
, ks
->buf
, __GKSEQ_BUFSIZE
);
66 if (ks
->end
< __GKSEQ_BUFSIZE
)
71 return (int) ks
->buf
[ks
->begin
++];
74 static int ks_getuntil(kstream_t
* ks
, int delimiter
, kstring_t
* str
, int *dret
) {
78 if (ks
->begin
>= ks
->end
&& ks
->is_eof
)
82 if (ks
->begin
>= ks
->end
) {
85 ks
->end
= __GKSEQ_READFUNC(ks
->f
, ks
->buf
, __GKSEQ_BUFSIZE
);
86 if (ks
->end
< __GKSEQ_BUFSIZE
)
94 for (i
= ks
->begin
; i
< ks
->end
; ++i
)
95 if (ks
->buf
[i
] == delimiter
)
98 for (i
= ks
->begin
; i
< ks
->end
; ++i
)
99 if (isspace(ks
->buf
[i
]))
102 if (str
->m
- str
->l
< i
- ks
->begin
+ 1) {
103 str
->m
= str
->l
+ (i
- ks
->begin
) + 1;
105 str
->s
= (char *) realloc(str
->s
, str
->m
);
107 memcpy(str
->s
+ str
->l
, ks
->buf
+ ks
->begin
, i
- ks
->begin
);
108 str
->l
= str
->l
+ (i
- ks
->begin
);
116 str
->s
[str
->l
] = '\0';
120 static inline kseq_t
*kseq_init(__GKSEQ_FILETYPE fd
) {
121 kseq_t
*s
= (kseq_t
*) calloc(1, sizeof(kseq_t
));
123 /* realloc() may change the address,
124 thus, even we `malloc(32)` here and set `s->seq.m`,
125 `s->seq.s` still may be changed later on realloc().
130 static inline void kseq_rewind(kseq_t
* ks
) {
132 ks
->f
->is_eof
= ks
->f
->begin
= ks
->f
->end
= 0;
135 static inline void kseq_destroy(kseq_t
* ks
) {
137 free(ks
->name
.s
); free(ks
->comment
.s
); free(ks
->seq
.s
); free(ks
->qual
.s
);
143 // >=0 length of the sequence (normal)
145 3 FASTQ, both bases and Q
147 -2 truncated quality string
149 static int_fast8_t kseq_read(kseq_t
* seq
) { // better to be ssize_t
151 kstream_t
*ks
= seq
->f
;
152 if (seq
->last_char
== 0) { // then jump to the next header line
153 while ((c
= ks_getc(ks
)) != -1 && c
!= '>' && c
!= '@');
155 return -1; // end of file
157 } // the first header char has been read
158 seq
->comment
.l
= seq
->seq
.l
= seq
->qual
.l
= 0;
159 if (ks_getuntil(ks
, 0, &seq
->name
, &c
) < 0)
162 ks_getuntil(ks
, '\n', &seq
->comment
, 0);
163 while ((c
= ks_getc(ks
)) != -1 && c
!= '>' && c
!= '+' && c
!= '@') {
164 if (isgraph(c
)) { // printable non-space character
165 if (seq
->seq
.l
+ 1 >= seq
->seq
.m
) { // double the memory
166 seq
->seq
.m
= seq
->seq
.l
+ 2;
167 KROUNDUP32(seq
->seq
.m
); // rounded to next closest 2^k
168 seq
->seq
.s
= (char *) realloc(seq
->seq
.s
, seq
->seq
.m
);
170 seq
->seq
.s
[seq
->seq
.l
++] = (char) c
;
173 if (c
== '>' || c
== '@')
174 seq
->last_char
= c
; // the first header char has been read
175 seq
->seq
.s
[seq
->seq
.l
] = 0; // null terminated string
177 //seq->qual.s[0] = 0; // set Q to "". also bool, 0 for no Q. JUST TEMP, should change back later !!!
178 return 1; // FASTA, only bases
180 if (seq
->qual
.m
< seq
->seq
.m
) { // allocate enough memory
181 seq
->qual
.m
= seq
->seq
.m
;
182 seq
->qual
.s
= (char *) realloc(seq
->qual
.s
, seq
->qual
.m
);
184 while ((c
= ks_getc(ks
)) != -1 && c
!= '\n'); // skip the rest of '+' line
186 return -2; // we should not stop here
187 while ((c
= ks_getc(ks
)) != -1 && seq
->qual
.l
< seq
->seq
.l
)
188 if (c
>= 33 && c
<= 127)
189 seq
->qual
.s
[seq
->qual
.l
++] = (unsigned char) c
;
190 seq
->qual
.s
[seq
->qual
.l
] = 0; // null terminated string
191 seq
->last_char
= 0; // we have not come to the next header line
192 if (seq
->seq
.l
!= seq
->qual
.l
)
193 return -2; // qual string is shorter than seq string
195 return 3; // both base and Q