1 #define _GNU_SOURCE // getline
3 #include <stdlib.h> //EXIT_FAILURE
12 char *strtrimr(char *pstr
)
16 while (isspace(pstr
[i
]) && (i
>= 0))
23 char *strtriml(char *pstr
)
27 while (isspace(pstr
[i
]) && (i
<= j
))
30 strcpy(pstr
, &pstr
[i
]);
36 char *strtrim(char *pstr
)
43 void write_example_cfg(const char * const filename
) {
45 FILE *fp
= fopen(filename
, "w");
46 if (fp
== NULL
) err(EXIT_FAILURE
, "Cannot write to [%s]", filename
);
48 # Estimated Genome Size in M bp\n\
51 # Heterozygosity rate\n\
54 # DNA sequence error rate\n\
60 # Mutation to extra kmer effective ratio (that less than 'k' or '2k-1')\n\
64 ./fullpath/to/fasta.fa\n\
65 ./fullpath/to/fastaq.fq.any.filename.is.OK\n\
66 ./fullpath/to/fasta.or.fastaq.gz\n\
69 err(EXIT_FAILURE
, "Error writing to [%s]", filename
);
73 SDLConfig
*read_SDL_cfg(const double dkmersize
, const char * const filename
){
74 SDLConfig
*psdlcfg
= malloc(sizeof(SDLConfig
));
75 psdlcfg
->fp
= fopen(filename
, "r");
76 psdlcfg
->seqfilename
=NULL
;
77 psdlcfg
->seqfilename_length
=0u;
78 if (psdlcfg
->fp
== NULL
) err(EXIT_FAILURE
, "Cannot open input_config [%s]", filename
);
82 unsigned char ConfigCount
=0;
83 while ((read
= getline(&line
, &len
, psdlcfg
->fp
)) != -1) {
84 if (*line
=='\n' || line
[0]=='#') continue;
85 if (line
[0]=='[' && line
[read
-2]==']') {
86 *(line
+(--read
))='\0'; //chop
89 if (*(line
+read
-1)=='\n') *(line
+(--read
))='\0'; // line[read-1] = '\0';
90 char *p2
= strchr(line
, '=');
91 if (p2
== NULL
) err(EXIT_FAILURE
, "Invalid file format:[%s]. See example input_config.", line
);
95 if (strcmp(line
,"Genome_Size")==0) {
96 psdlcfg
->GenomeSize
=atof(p2
);
97 if (psdlcfg
->GenomeSize
< 0)
98 errx(EXIT_FAILURE
, "Invalid Genome_Size:[%f].", psdlcfg
->GenomeSize
);
102 if (strcmp(line
,"Het")==0) {
103 psdlcfg
->HetRatio
=atof(p2
);
104 if (psdlcfg
->HetRatio
< 0 || psdlcfg
->HetRatio
> 1)
105 errx(EXIT_FAILURE
, "Invalid Het:[%f].", psdlcfg
->HetRatio
);
109 if (strcmp(line
,"Seq_Err")==0) {
110 psdlcfg
->SeqErrRate
=atof(p2
);
111 if (psdlcfg
->SeqErrRate
< 0 || psdlcfg
->SeqErrRate
> 1)
112 errx(EXIT_FAILURE
, "Invalid Seq_Err:[%f].", psdlcfg
->SeqErrRate
);
116 if (strcmp(line
,"Seq_Depth")==0) {
117 psdlcfg
->SeqDepth
=atof(p2
);
118 if (psdlcfg
->SeqDepth
< 0)
119 errx(EXIT_FAILURE
, "Invalid Seq_Depth:[%f].", psdlcfg
->SeqDepth
);
123 if (strcmp(line
,"Mut_Effect")==0) {
124 psdlcfg
->MutEffect
=atof(p2
);
125 if (psdlcfg
->MutEffect
< 0 || psdlcfg
->MutEffect
> 1)
126 errx(EXIT_FAILURE
, "Invalid Mut_Effect:[%f].", psdlcfg
->MutEffect
);
131 if (SDLConfig_Count
!= ConfigCount
)
132 errx(EXIT_FAILURE
, "Invalid file format:(%u != %u). See example input_config.", ConfigCount
,SDLConfig_Count
);
133 if (strcmp(line
,"[Sequence Files]")) {
134 errx(EXIT_FAILURE
, "The first section is \"%s\", not \"[Sequence Files]\". See example input_config.", line
);
138 calculate_SDL_cfg(dkmersize
, psdlcfg
);
141 void calculate_SDL_cfg(const double dkmersize
, SDLConfig
*psdlcfg
){
142 double HetX
=2.0*dkmersize
*psdlcfg
->HetRatio
-1.0;
143 if (HetX
>1.0) HetX
=1.0;
144 double ErrX
=dkmersize
*psdlcfg
->SeqErrRate
;
145 if (ErrX
>1.0) ErrX
=1.0;
146 double TotalKmerX
=(1.0+HetX
)*(1.0 + psdlcfg
->SeqDepth
*ErrX
);
147 if (TotalKmerX
> psdlcfg
->SeqDepth
*dkmersize
)
148 TotalKmerX
= psdlcfg
->SeqDepth
*dkmersize
;
149 psdlcfg
->MaxKmerCount
= TotalKmerX
* psdlcfg
->GenomeSize
;
152 ssize_t
get_next_seqfile(SDLConfig
* const psdlcfg
){
154 while ( (read
= getline(&(psdlcfg
->seqfilename
), &(psdlcfg
->seqfilename_length
), psdlcfg
->fp
)) != -1) {
155 if (*(psdlcfg
->seqfilename
)=='\n') continue;
156 if (*((psdlcfg
->seqfilename
)+read
-1)=='\n') *((psdlcfg
->seqfilename
)+(--read
))='\0';
159 return read
; // return -1;
162 void destory_seqfile(SDLConfig
* psdlcfg
){
164 free(psdlcfg
->seqfilename
);