3 #include <fcntl.h> // open, above 3
4 #include <unistd.h> // read, close
6 #include <string.h> // memcmp
7 #include <stdlib.h> //calloc
8 #include <stdio.h> // puts
9 #include <stdint.h> // uint_fast8_t
10 //#include <zlib.h> // already in gkseq.h
11 #include "gkseq.h" // No more `kseq.h` with tons of Macros !!!
16 ssize_t
read_kseq_no2bit(SeqFileObj
* const seqObj
) {
17 size_t seqlen
; // in fact size_t, but minus values are meanful.
18 int_fast8_t rvalue
= kseq_read(seqObj
->fobj
);
20 uint_fast8_t type
= rvalue
; // 1 or 3. No need to &3 now.
24 seqObj
->name
= kseq
->name
.s
;
25 seqObj
->comment
= kseq
->comment
.s
;
26 seqObj
->seq
= kseq
->seq
.s
;
27 NormalizeChrSeq(kseq
->seq
.s
); // to /[ATCGN]*/
28 if (rvalue
&2) { // withQ
31 seqObj
->qual
= kseq
->qual
.s
;
35 seqObj
->readlength
= seqlen
;
40 ssize_t
read_kseq_with2bit(SeqFileObj
* const seqObj
) {
41 size_t seqlen
; // in fact size_t, but minus values are meanful.
42 int_fast8_t rvalue
= kseq_read(seqObj
->fobj
);
44 uint_fast8_t type
= rvalue
; // 1 or 3. No need to &3 now.
48 seqObj
->name
= kseq
->name
.s
;
49 seqObj
->comment
= kseq
->comment
.s
;
50 seqObj
->seq
= kseq
->seq
.s
;
51 if (rvalue
&2) { // withQ
54 seqObj
->qual
= kseq
->qual
.s
;
58 size_t needtomallocQQW
= (seqlen
+31u)>>5; // 1 "QQWord" = 4 QWord = 32 bp. Well, I say there is QQW.
59 if (needtomallocQQW
> seqObj
->binMallocedQQWord
) {
60 KROUNDUP32(needtomallocQQW
);
61 seqObj
->binMallocedQQWord
= needtomallocQQW
;
62 seqObj
->diBseq
= realloc(seqObj
->diBseq
,needtomallocQQW
<<3); // 2^3=8
63 seqObj
->hexBQ
= realloc(seqObj
->hexBQ
,needtomallocQQW
<<5); // 4*2^3=32
65 seqObj
->binNcount
= base2dbit(seqlen
, kseq
->seq
.s
, seqObj
->qual
, seqObj
->diBseq
, seqObj
->hexBQ
);
66 // printf("-[%s]<%s><%zx>[%s]-\n",kseq->seq.s, qstr, seqObj->diBseq[0], unit2basechr(seqObj->diBseq[0]));
67 // Well, how to deal with smallcase masking ? Not using this information yet.
68 NormalizeChrSeq(kseq
->seq
.s
); // to /[ATCGN]*/
69 seqObj
->readlength
= seqlen
;
74 void close_kseq(SeqFileObj
* const seqObj
){
75 gzFile fp
= ((kseq_t
*)seqObj
->fobj
)->f
->f
;
77 kseq_destroy(seqObj
->fobj
);
80 // binmode: 1=base char, 2=base 2bit
81 SeqFileObj
* inSeqFinit(const char * const filename
, unsigned char binmode
) {
84 char FileID
[G_HEADER_LENGTH
];
85 fd
= open(filename
, O_RDONLY
);
86 if (fd
== -1) {warn("\n[x]Cannot open Sequence_file [%s]", filename
); return NULL
;}
87 fdstat
= read(fd
, FileID
, G_HEADER_LENGTH
);
88 if ( fdstat
!= G_HEADER_LENGTH
) {
89 warn("[!]Too short (%zd) for sequence_file [%s].", fdstat
, filename
);
94 warn("[!]ErrNo:[%zd] while closing file [%s].", fdstat
, filename
);
96 SeqFileObj
* const seqObj
= malloc(sizeof(SeqFileObj
)); // no more calloc(), just {.datePos[1]=0} is needed for kseq way.
97 //CANNOT use `&(SeqFileObj) {.datePos={0}};`, which is of a short lifetime !
99 if ( memcmp(G_TYPE_FAQC
, FileID
, G_HEADER_LENGTH
) ) { // Not G_TYPE_FAQC, thus to kseq.h
103 fp
= gzopen(filename
, "r");
104 if (! fp
) err(EXIT_FAILURE
, "\n[x]Cannot open with zlib for [%s]", filename
);
105 seq
= kseq_init(fp
); // calloc, thus safe
106 seqObj
->datePos
[0] = -1; // seeking not available here.
107 seqObj
->datePos
[1] = 0;
109 if (binmode
& GFIODIBBASE
) {
110 seqObj
->getNextSeq
= read_kseq_with2bit
; // (ssize_t (*)(void*))
112 seqObj
->getNextSeq
= read_kseq_no2bit
;
114 seqObj
->closefh
= close_kseq
;
115 seqObj
->diBseq
= NULL
; // We need NULL to free ...
116 seqObj
->hexBQ
= NULL
;
117 seqObj
->binMallocedQQWord
= 0;
118 seqObj
->binNcount
= 0;
119 } else { // is G_TYPE_FAQC
120 errx(EXIT_FAILURE
, "\n[:(]FAQC file not supported now.");
126 void inSeqFdestroy(SeqFileObj
* const seqObj
) {
127 (*seqObj
->closefh
)(seqObj
);
128 free(seqObj
->diBseq
);