modified: Makefile
[GalaxyCodeBases.git] / c_cpp / readscorr / cfgparser.c
blob5fe689a7d16d9809cdf2075df8f84db9ab33fb56
1 #define _GNU_SOURCE // getline
2 #include <stdio.h>
3 #include <stdlib.h> //EXIT_FAILURE
4 #include <err.h>
5 #include <string.h>
6 #include <ctype.h>
7 #include "cfgparser.h"
9 /*
10 *去除字符串右端空格
12 char *strtrimr(char *pstr)
14 int i;
15 i = strlen(pstr) - 1;
16 while (isspace(pstr[i]) && (i >= 0))
17 pstr[i--] = '\0';
18 return pstr;
21 *去除字符串左端空格
23 char *strtriml(char *pstr)
25 int i = 0,j;
26 j = strlen(pstr) - 1;
27 while (isspace(pstr[i]) && (i <= j))
28 i++;
29 if (0<i)
30 strcpy(pstr, &pstr[i]);
31 return pstr;
34 *去除字符串两端空格
36 char *strtrim(char *pstr)
38 char *p;
39 p = strtrimr(pstr);
40 return strtriml(p);
43 void write_example_cfg(const char * const filename) {
44 fputs(".\b", stderr);
45 FILE *fp = fopen(filename, "w");
46 if (fp == NULL) err(EXIT_FAILURE, "Cannot write to [%s]", filename);
47 fputs("\
48 # Estimated Genome Size in M bp\n\
49 Genome_Size=3000\n\
50 \n\
51 # Heterozygosity rate\n\
52 Het=0.01\n\
53 \n\
54 # DNA sequence error rate\n\
55 Seq_Err=0.02\n\
56 \n\
57 # Sequence Depth\n\
58 Seq_Depth=20\n\
59 \n\
60 # Mutation to extra kmer effective ratio (that less than 'k' or '2k-1')\n\
61 Mut_Effect=0.8\n\
62 \n\
63 [Sequence Files]\n\
64 ./fullpath/to/fasta.fa\n\
65 ./fullpath/to/fastaq.fq.any.filename.is.OK\n\
66 ./fullpath/to/fasta.or.fastaq.gz\n\
67 ", fp);
68 if ( fclose(fp) )
69 err(EXIT_FAILURE, "Error writing to [%s]", filename);
70 fputs("\n", stderr);
73 SDLConfig *read_SDL_cfg(const double dkmersize, const char * const filename){
74 SDLConfig *psdlcfg = malloc(sizeof(SDLConfig));
75 psdlcfg->fp = fopen(filename, "r");
76 psdlcfg->seqfilename=NULL;
77 psdlcfg->seqfilename_length=0u;
78 if (psdlcfg->fp == NULL) err(EXIT_FAILURE, "Cannot open input_config [%s]", filename);
79 char *line = NULL;
80 size_t len = 0;
81 ssize_t read;
82 unsigned char ConfigCount=0;
83 while ((read = getline(&line, &len, psdlcfg->fp)) != -1) {
84 if (*line=='\n' || line[0]=='#') continue;
85 if (line[0]=='[' && line[read-2]==']') {
86 *(line+(--read))='\0'; //chop
87 break;
89 if (*(line+read-1)=='\n') *(line+(--read))='\0'; // line[read-1] = '\0';
90 char *p2 = strchr(line, '=');
91 if (p2 == NULL) err(EXIT_FAILURE, "Invalid file format:[%s]. See example input_config.", line);
92 *p2++ = '\0';
93 p2 = strtrim(p2);
94 line = strtrim(line);
95 if (strcmp(line,"Genome_Size")==0) {
96 psdlcfg->GenomeSize=atof(p2);
97 if (psdlcfg->GenomeSize < 0)
98 errx(EXIT_FAILURE, "Invalid Genome_Size:[%f].", psdlcfg->GenomeSize);
99 ++ConfigCount;
100 continue;
102 if (strcmp(line,"Het")==0) {
103 psdlcfg->HetRatio=atof(p2);
104 if (psdlcfg->HetRatio < 0 || psdlcfg->HetRatio > 1)
105 errx(EXIT_FAILURE, "Invalid Het:[%f].", psdlcfg->HetRatio);
106 ++ConfigCount;
107 continue;
109 if (strcmp(line,"Seq_Err")==0) {
110 psdlcfg->SeqErrRate=atof(p2);
111 if (psdlcfg->SeqErrRate < 0 || psdlcfg->SeqErrRate > 1)
112 errx(EXIT_FAILURE, "Invalid Seq_Err:[%f].", psdlcfg->SeqErrRate);
113 ++ConfigCount;
114 continue;
116 if (strcmp(line,"Seq_Depth")==0) {
117 psdlcfg->SeqDepth=atof(p2);
118 if (psdlcfg->SeqDepth < 0)
119 errx(EXIT_FAILURE, "Invalid Seq_Depth:[%f].", psdlcfg->SeqDepth);
120 ++ConfigCount;
121 continue;
123 if (strcmp(line,"Mut_Effect")==0) {
124 psdlcfg->MutEffect=atof(p2);
125 if (psdlcfg->MutEffect < 0 || psdlcfg->MutEffect > 1)
126 errx(EXIT_FAILURE, "Invalid Mut_Effect:[%f].", psdlcfg->MutEffect);
127 ++ConfigCount;
128 continue;
131 if (SDLConfig_Count != ConfigCount)
132 errx(EXIT_FAILURE, "Invalid file format:(%u != %u). See example input_config.", ConfigCount,SDLConfig_Count);
133 if (strcmp(line,"[Sequence Files]")) {
134 errx(EXIT_FAILURE, "The first section is \"%s\", not \"[Sequence Files]\". See example input_config.", line);
136 free(line);
137 //calculate
138 calculate_SDL_cfg(dkmersize, psdlcfg);
139 return psdlcfg;
141 void calculate_SDL_cfg(const double dkmersize, SDLConfig *psdlcfg){
142 double HetX=2.0*dkmersize*psdlcfg->HetRatio-1.0;
143 if (HetX>1.0) HetX=1.0;
144 double ErrX=dkmersize*psdlcfg->SeqErrRate;
145 if (ErrX>1.0) ErrX=1.0;
146 double TotalKmerX=(1.0+HetX)*(1.0 + psdlcfg->SeqDepth*ErrX);
147 if (TotalKmerX > psdlcfg->SeqDepth*dkmersize)
148 TotalKmerX = psdlcfg->SeqDepth*dkmersize;
149 psdlcfg->MaxKmerCount = TotalKmerX * psdlcfg->GenomeSize;
152 ssize_t get_next_seqfile(SDLConfig * const psdlcfg){
153 ssize_t read;
154 while ( (read = getline(&(psdlcfg->seqfilename), &(psdlcfg->seqfilename_length), psdlcfg->fp)) != -1) {
155 if (*(psdlcfg->seqfilename)=='\n') continue;
156 if (*((psdlcfg->seqfilename)+read-1)=='\n') *((psdlcfg->seqfilename)+(--read))='\0';
157 return read;
159 return read; // return -1;
162 void destory_seqfile(SDLConfig * psdlcfg){
163 fclose(psdlcfg->fp);
164 free(psdlcfg->seqfilename);
165 free(psdlcfg);