2 From Heng Li <lh3@sanger.ac.uk>, as in bwa package.
3 Macro translated by Hu Xuesong.
14 #define __GKSEQ_FILETYPE gzFile // Well, EP enough to use define here.
15 #define __GKSEQ_READFUNC gzread // Maybe "lh3" just want to make debug harder for his code ?
16 //#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
17 //#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
18 #define __GKSEQ_BUFSIZE 4096
21 #define KROUNDUP32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
25 #define KSTRING_T kstring_t
26 typedef struct __kstring_t
{
32 typedef struct __kstream_t
{
34 int begin
, end
, is_eof
;
39 kstring_t name
, comment
, seq
, qual
;
44 static inline kstream_t
*ks_init(__GKSEQ_FILETYPE f
) {
45 kstream_t
*ks
= (kstream_t
*) calloc(1, sizeof(kstream_t
));
47 ks
->buf
= (char *) malloc(__GKSEQ_BUFSIZE
);
51 static inline void ks_destroy(kstream_t
* ks
) {
58 static inline int ks_getc(kstream_t
* ks
) {
59 if (ks
->is_eof
&& ks
->begin
>= ks
->end
)
61 if (ks
->begin
>= ks
->end
) {
63 ks
->end
= __GKSEQ_READFUNC(ks
->f
, ks
->buf
, __GKSEQ_BUFSIZE
);
64 if (ks
->end
< __GKSEQ_BUFSIZE
)
69 return (int) ks
->buf
[ks
->begin
++];
72 static int ks_getuntil(kstream_t
* ks
, int delimiter
, kstring_t
* str
, int *dret
) {
76 if (ks
->begin
>= ks
->end
&& ks
->is_eof
)
80 if (ks
->begin
>= ks
->end
) {
83 ks
->end
= __GKSEQ_READFUNC(ks
->f
, ks
->buf
, __GKSEQ_BUFSIZE
);
84 if (ks
->end
< __GKSEQ_BUFSIZE
)
92 for (i
= ks
->begin
; i
< ks
->end
; ++i
)
93 if (ks
->buf
[i
] == delimiter
)
96 for (i
= ks
->begin
; i
< ks
->end
; ++i
)
97 if (isspace(ks
->buf
[i
]))
100 if (str
->m
- str
->l
< i
- ks
->begin
+ 1) {
101 str
->m
= str
->l
+ (i
- ks
->begin
) + 1;
103 str
->s
= (char *) realloc(str
->s
, str
->m
);
105 memcpy(str
->s
+ str
->l
, ks
->buf
+ ks
->begin
, i
- ks
->begin
);
106 str
->l
= str
->l
+ (i
- ks
->begin
);
114 str
->s
[str
->l
] = '\0';
118 static inline kseq_t
*kseq_init(__GKSEQ_FILETYPE fd
) {
119 kseq_t
*s
= (kseq_t
*) calloc(1, sizeof(kseq_t
));
124 static inline void kseq_rewind(kseq_t
* ks
) {
126 ks
->f
->is_eof
= ks
->f
->begin
= ks
->f
->end
= 0;
129 static inline void kseq_destroy(kseq_t
* ks
) {
131 free(ks
->name
.s
); free(ks
->comment
.s
); free(ks
->seq
.s
); free(ks
->qual
.s
);
137 // >=0 length of the sequence (normal)
139 3 FASTQ, both bases and Q
141 -2 truncated quality string
143 static int_fast8_t kseq_read(kseq_t
* seq
) {
145 kstream_t
*ks
= seq
->f
;
146 if (seq
->last_char
== 0) { // then jump to the next header line
147 while ((c
= ks_getc(ks
)) != -1 && c
!= '>' && c
!= '@');
149 return -1; // end of file
151 } // the first header char has been read
152 seq
->comment
.l
= seq
->seq
.l
= seq
->qual
.l
= 0;
153 if (ks_getuntil(ks
, 0, &seq
->name
, &c
) < 0)
156 ks_getuntil(ks
, '\n', &seq
->comment
, 0);
157 while ((c
= ks_getc(ks
)) != -1 && c
!= '>' && c
!= '+' && c
!= '@') {
158 if (isgraph(c
)) { // printable non-space character
159 if (seq
->seq
.l
+ 1 >= seq
->seq
.m
) { // double the memory
160 seq
->seq
.m
= seq
->seq
.l
+ 2;
161 KROUNDUP32(seq
->seq
.m
); // rounded to next closest 2^k
162 seq
->seq
.s
= (char *) realloc(seq
->seq
.s
, seq
->seq
.m
);
164 seq
->seq
.s
[seq
->seq
.l
++] = (char) c
;
167 if (c
== '>' || c
== '@')
168 seq
->last_char
= c
; // the first header char has been read
169 seq
->seq
.s
[seq
->seq
.l
] = 0; // null terminated string
171 return 1; // FASTA, only bases
173 if (seq
->qual
.m
< seq
->seq
.m
) { // allocate enough memory
174 seq
->qual
.m
= seq
->seq
.m
;
175 seq
->qual
.s
= (char *) realloc(seq
->qual
.s
, seq
->qual
.m
);
177 while ((c
= ks_getc(ks
)) != -1 && c
!= '\n'); // skip the rest of '+' line
179 return -2; // we should not stop here
180 while ((c
= ks_getc(ks
)) != -1 && seq
->qual
.l
< seq
->seq
.l
)
181 if (c
>= 33 && c
<= 127)
182 seq
->qual
.s
[seq
->qual
.l
++] = (unsigned char) c
;
183 seq
->qual
.s
[seq
->qual
.l
] = 0; // null terminated string
184 seq
->last_char
= 0; // we have not come to the next header line
185 if (seq
->seq
.l
!= seq
->qual
.l
)
186 return -2; // qual string is shorter than seq string
188 return 3; // both base and Q