1 //本程序根据参考序列上mapped的reads来计算测序样本的genotype, 读取bwa mpielup格式输入文件
2 //依据bayes theory,以sample出现纯合和杂合SNP的概率和测序错误分值模型为先验概率,计算观测数据下每一种genotype的后验概率。
3 //测序错误分值模型,与模拟reads所用的模型相同,由分析重测序数据而得。它是一个四维矩阵:ref_base x cycle x seq_base x quality。
4 //解释:我们实际想得到的是P(D|genotype), 此处ref_base即为geotype, 而D包含三个属性(cycle,seq_base,quality)。
5 //我们想得到是P(cycle,seq_base,quality|ref_base), 而它又等价于P(cycle|ref_base) * P(seq_base,quality|cycle,ref_base)。
6 //因为P(cycle|ref_base)可以认为是一个常数,在整个bayes公式的计算中不起作用,因此我们把它忽略。
7 //这样可以将P(seq_base,quality|cycle,ref_base)直接用于bayes公式,也就形成了ref_base*cycle作为列,seq_base*quality作为行的二维matrix文件。
8 //本程序采用long double (128bit浮点数),来高精度的表示概率。
9 //coverage depth最大允许值为400,以防计算过程中数值指数部分超过long double界限。超过400的部分将被忽略,程序不报错不中止。
10 //选取概率最大的genotype作为最终结果,并将其错误率转换为phred quality score, 并设上限为100。
12 //pirs和soapsnp的matrix是等效的,为保持一致,此程序采用soapsnp格式的matrix, P(base|ref,qual,cycle)
14 //Author: Fan Wei, fanweisz09@gmail.com
29 #include <boost/lexical_cast.hpp>
30 #include <boost/algorithm/string.hpp>
35 typedef long double rate_t
;
36 rate_t PTi
[4][10]; //store the prior probability for each substitutions
37 rate_t
*PdTi
; //store the distribution of error and quality
45 int Ref_Cyc_base_qual
;
46 string Error_profile_file
;
47 string Out_prefix
= "output";
48 float Trans_Tranv_ratio
= 2.0;
49 float Hom_SNP_rate
= 0.0005;
50 float Het_SNP_rate
= 0.0005;
51 int SNP_prior_mode
= 1;
52 int Ascii_Qual_Start
= 33;
53 int Only_output_snp
= 1;
57 'A', 'C', 'G', 'T', 'N'
60 //用0,1,2,3,分别代表A,C,G,T
61 int DipGeno
[10][2] = {0,0, 1,1, 2,2, 3,3, 0,1, 0,2, 0,3, 1,2, 1,3, 2,3};
63 map
<int,char> TwoBases
;
64 map
<int, int> TransSubs
;
67 void define_TransSubs()
69 TransSubs
[0] = 2; TransSubs
[2] = 0;
70 TransSubs
[1] = 3; TransSubs
[3] = 1;
74 void define_two_bases_markers()
77 TwoBases
[0] = 'A'; TwoBases
[1] = 'C'; TwoBases
[2] = 'G'; TwoBases
[3] = 'T';
78 TwoBases
[4] = 'M'; TwoBases
[5] = 'R'; TwoBases
[6] = 'W';
79 TwoBases
[7] = 'S'; TwoBases
[8] = 'Y'; TwoBases
[9] = 'K';
84 { cout
<< "calculate_diploid_genotype <*.mpileup>" << endl
;
85 cout
<< " version 1.0" << endl
;
86 cout
<< " -r <float> homozygous SNP prior rate, default=" << Hom_SNP_rate
<< endl
;
87 cout
<< " -e <float> heterozygous SNP prior rate, default=" << Het_SNP_rate
<< endl
;
88 cout
<< " -t <float> transition vs transversion prior ratio, default=" << Trans_Tranv_ratio
<< endl
;
89 cout
<< " -d <int> set the prior mode, 0: no prior; 1: with prior; default="<< SNP_prior_mode
<< endl
;
90 cout
<< " -m <file> matrix file of error and quality distributions, default=exe_path/XieSunney.blood.matrix.soapsnp" << endl
;
91 cout
<< " -q <int> ASCII chracter standing for quality==0, default=" << Ascii_Qual_Start
<< endl
;
92 cout
<< " -o <string> prefix for the output files" << endl
;
93 cout
<< " -s <int> two output mode, 1, only snp; 0, cns; default=" << Only_output_snp
<< endl
;
94 cout
<< " -h get help information" << endl
;
100 int get_base (char refBase
)
105 case 'A': refBaseId
= 0; break;
106 case 'C': refBaseId
= 1; break;
107 case 'G': refBaseId
= 2; break;
108 case 'T': refBaseId
= 3; break;
109 case 'a': refBaseId
= 0; break;
110 case 'c': refBaseId
= 1; break;
111 case 'g': refBaseId
= 2; break;
112 case 't': refBaseId
= 3; break;
113 default: refBaseId
= 4;
119 //限定freq_rate在1到1e-9之间,统计样本量级1G
120 void load_soapsnp_error_profile (string
&matrix_file
)
123 infile
.open(matrix_file
.c_str());
125 { cerr
<< "fail to open input file" << matrix_file
<< endl
;
129 int pi
= 0; //PdTi的循环变量
130 int line_field_number
= 18;
131 vector
<string
> textlines
;
134 while (getline( infile
, lineStr
, '\n' ))
135 { if (lineStr
[0] == '#')
139 vector
<string
> lineVec
;
140 boost::split(lineVec
,lineStr
, boost::is_any_of(" \t\n"), boost::token_compress_on
);
141 if (lineVec
.size() != line_field_number
)
145 Quality_num
= boost::lexical_cast
<int>(lineVec
[0]) + 1;
146 Cycle_num
= boost::lexical_cast
<int>(lineVec
[1]) + 1;
148 textlines
.push_back(lineStr
);
153 Ref_Cyc_base_qual
= Ref_Base_num
* Seq_Base_num
* Quality_num
* Cycle_num
;
154 Qual_add_val
= Ref_Base_num
* Seq_Base_num
* Cycle_num
;
155 Cycle_add_val
= Ref_Base_num
* Seq_Base_num
;
156 Ref_add_val
= Seq_Base_num
;
158 PdTi
= new rate_t
[Ref_Cyc_base_qual
];
159 for (int j
=0; j
<textlines
.size(); j
++)
161 vector
<string
> lineVec
;
162 boost::split(lineVec
,textlines
[j
], boost::is_any_of(" \t\n"), boost::token_compress_on
);
163 int this_qual
= boost::lexical_cast
<int>(lineVec
[0]);
164 int this_cycl
= boost::lexical_cast
<int>(lineVec
[1]);
166 for (int i
=2; i
<line_field_number
; i
++)
168 int this_ref
= k
/ Seq_Base_num
;
169 int this_base
= k
% Seq_Base_num
;
170 pi
= this_qual
*Qual_add_val
+ this_cycl
*Cycle_add_val
+ this_ref
*Ref_add_val
+ this_base
;
171 rate_t freq_rate
= boost::lexical_cast
<rate_t
>(lineVec
[i
]);
173 if (pi
< Ref_Cyc_base_qual
)
174 { PdTi
[pi
] = (freq_rate
> 1e-9L) ? freq_rate
: 1e-9L;
176 cerr
<< "pi exceeds its range " << Ref_Cyc_base_qual
<< endl
;
185 //根据Trans_Tranv_ratio,Hom_SNP_rate,Het_SNP_rate,SNP_prior_mode来计算genotype的prior probability
186 void assign_PTi_rates ()
188 double TsTvR
= Trans_Tranv_ratio
;
189 double NonSnpRate
= 1 - Hom_SNP_rate
- Het_SNP_rate
;
190 double hom_trans_rate
= Hom_SNP_rate
* TsTvR
*2 / (TsTvR
*2 + 1 + 1);
191 double hom_tranv_rate
= Hom_SNP_rate
* 1 / (TsTvR
*2 + 1 + 1);
193 double HapSnpRate
= Hom_SNP_rate
+ Het_SNP_rate
; //近似,非准确
194 double HapNonSnpRate
= 1 - HapSnpRate
;
195 double HapTransSnpRate
= HapSnpRate
* TsTvR
*2 / (TsTvR
*2 + 1 + 1);
196 double HapTranvSnpRate
= HapSnpRate
* 1 / (TsTvR
*2 + 1 + 1);
198 double hetNonTransRate
;
199 double hetNonTranvRate
;
200 double hetTransTranvRate
;
201 double hetTranvTranvRate
;
202 double relativeTotal
;
204 relativeTotal
= HapNonSnpRate
*HapTransSnpRate
* 1 + HapNonSnpRate
*HapTranvSnpRate
*2 + HapTransSnpRate
*HapTranvSnpRate
* 2 + HapTranvSnpRate
*HapTranvSnpRate
* 1;
205 hetNonTransRate
= (relativeTotal
> 0) ? HapNonSnpRate
*HapTransSnpRate
/ relativeTotal
* Het_SNP_rate
: 0;
206 hetNonTranvRate
= (relativeTotal
> 0) ? HapNonSnpRate
*HapTranvSnpRate
/ relativeTotal
* Het_SNP_rate
: 0;
207 hetTransTranvRate
= (relativeTotal
> 0) ? HapTransSnpRate
*HapTranvSnpRate
/ relativeTotal
* Het_SNP_rate
: 0;
208 hetTranvTranvRate
= (relativeTotal
> 0) ? HapTranvSnpRate
*HapTranvSnpRate
/ relativeTotal
* Het_SNP_rate
: 0;
210 if (SNP_prior_mode
== 0)
211 { NonSnpRate
= hom_trans_rate
= hom_tranv_rate
= hetNonTransRate
= hetNonTranvRate
= hetTransTranvRate
= hetTranvTranvRate
= 0.1;
214 for (int i
=0; i
<4; i
++)
215 { for (int j
=0; j
<10; j
++)
216 { if (i
== DipGeno
[j
][0] && i
== DipGeno
[j
][1]) // non-SNP
218 PTi
[i
][j
] = NonSnpRate
;
219 }else if(i
!= DipGeno
[j
][0] && DipGeno
[j
][0] == DipGeno
[j
][1]) //homo SNP
221 PTi
[i
][j
] = (TransSubs
[i
] == DipGeno
[j
][0]) ? hom_trans_rate
: hom_tranv_rate
;
223 { if (i
== DipGeno
[j
][0] || i
== DipGeno
[j
][1]) //het-one
224 { PTi
[i
][j
] = (TransSubs
[i
] == DipGeno
[j
][0] || TransSubs
[i
] == DipGeno
[j
][1]) ? hetNonTransRate
: hetNonTranvRate
;
226 { PTi
[i
][j
] = (TransSubs
[i
] != DipGeno
[j
][0] && TransSubs
[i
] != DipGeno
[j
][1]) ? hetTranvTranvRate
: hetTransTranvRate
;
234 //从pileup格式的base_str中得到干净的bases信息
235 void get_clean_bases(int refBase
, string
&raw_base_str
,vector
<int> &clean_base_vec
)
238 int len
= raw_base_str
.size();
241 if (raw_base_str
[pos
] == '^')
244 else if (raw_base_str
[pos
] == '+' || raw_base_str
[pos
] == '-')
248 { if (raw_base_str
[pos
] >= 48 && raw_base_str
[pos
] <= 57 ) //ASCII: 0-9
249 { indel_num
.push_back(raw_base_str
[pos
]);
256 pos
+= atoi(indel_num
.c_str());
260 switch (raw_base_str
[pos
])
262 case '.': clean_base_vec
.push_back(refBase
); break;
263 case ',': clean_base_vec
.push_back(refBase
); break;
264 case 'A': clean_base_vec
.push_back(0); break;
265 case 'a': clean_base_vec
.push_back(0); break;
266 case 'C': clean_base_vec
.push_back(1); break;
267 case 'c': clean_base_vec
.push_back(1); break;
268 case 'G': clean_base_vec
.push_back(2); break;
269 case 'g': clean_base_vec
.push_back(2); break;
270 case 'T': clean_base_vec
.push_back(3); break;
271 case 't': clean_base_vec
.push_back(3); break;
272 case 'N': clean_base_vec
.push_back(4); break;
273 case 'n': clean_base_vec
.push_back(4); break;
274 case '*': clean_base_vec
.push_back(4); break;
275 case '>': clean_base_vec
.push_back(4); break;
276 case '<': clean_base_vec
.push_back(4); break;
286 //从pileup格式的quality_str中得到quality信息,并转换成phred-scale, 33开始
287 void get_quality(vector
<int> &qual_vec
, string
&qual_str
)
289 for (int i
=0; i
<qual_str
.size(); i
++)
290 { int qual_val
= qual_str
[i
] - Ascii_Qual_Start
;
291 qual_val
= (qual_val
< Quality_num
) ? qual_val
: Quality_num
- 1; //质量值大于40的全部当成40
292 qual_vec
.push_back(qual_val
);
296 //从pileup格式的cycle_str中得到cycle信息
297 void get_cycle (vector
<int> &cycl_vec
, string
&cycl_str
)
299 vector
<string
> temp_vec
;
300 boost::split(temp_vec
,cycl_str
, boost::is_any_of(","), boost::token_compress_on
);
301 for (int i
=0; i
<temp_vec
.size(); i
++)
302 { //int cycle_val = boost::lexical_cast<int>(temp_vec[i]); //runs much slow
303 int cycle_val
= atoi(temp_vec
[i
].c_str()); //runs fast
304 if (cycle_val
> Cycle_num
)
305 { cycle_val
= Cycle_num
;
307 cycl_vec
.push_back(cycle_val
- 1); //cycle 0-99代表100个cycles
312 //根据指定的参考序列碱基,计算每一种genotype的后验概率P(Ti|D)
313 //coverage depth最大允许值为400,以防计算过程中数值指数部分超过long double界限
314 void calculte_genotype_probability(int refBase
, vector
<rate_t
> &pTiDvec
, vector
<int> &baseVec
, vector
<int> &qualVec
, vector
<int> &cyclVec
)
316 //cerr << "refbase: " << refBase << " " << Bases[refBase] << endl;
318 int len
= (baseVec
.size() < 400) ? baseVec
.size() : 400;
320 //vector<rate_t> pDTiVec;
321 for (int genotype
=0; genotype
<10; genotype
++)
323 rate_t pTi
= PTi
[refBase
][genotype
]; //
327 { for (int j
=0; j
<len
; j
++)
329 int num1
= qualVec
[j
]*Qual_add_val
+ cyclVec
[j
]*Cycle_add_val
+ DipGeno
[genotype
][0]*Ref_add_val
+ + baseVec
[j
];
330 int num2
= qualVec
[j
]*Qual_add_val
+ cyclVec
[j
]*Cycle_add_val
+ DipGeno
[genotype
][1]*Ref_add_val
+ + baseVec
[j
];
331 if (num1
>= Ref_Cyc_base_qual
|| num2
>= Ref_Cyc_base_qual
)
332 { cerr
<< "num1 or num2 exceeds its range " << num1
<< "\t" << num2
<< endl
;
335 pDTi
*= (PdTi
[num1
] + PdTi
[num2
] ) / 2;
336 //cerr << "# " << DipGeno[genotype][0] << "\t" << DipGeno[genotype][1] << "\t" << cyclVec[j] << "\t" << baseVec[j] << "\t" << qualVec[j] << "\t"<< PdTi[num1] << "\t" << PdTi[num2] << "\t" << num1 << "\t" << num2 << endl;
340 //pDTiVec.push_back(pDTi);
341 pTiDvec
.push_back( pTi
*pDTi
);
343 //cerr << "calculate: " << genotype << "\t" << pTi << "\t" << pDTi << "\t" << pTi*pDTi << "\t" << pD << endl;
346 for (int genotype
=0; genotype
<10; genotype
++)
347 { pTiDvec
[genotype
] = pTiDvec
[genotype
] / pD
;
350 //output middle results
351 /*for (int genotype=0; genotype<10; genotype++)
352 { rate_t pTi = PTi[refBase][genotype];
353 rate_t pDTi = pDTiVec[genotype];
354 cout << genotype << "\t" << pTi << "\t" << pDTi << "\t" << pTi*pDTi << "\t" << pD << "\t" << pTiDvec[genotype] << endl;
358 //find the genotype with maximum posterior likelihood and calcualte the phred-scale score (up to 100)
359 void find_genotype_score(vector
<rate_t
> &pTiDvec
, int &sample_genotype
, int &phred_score
)
362 rate_t max_likelihood
= 0.0;
363 for (int i
=0; i
<pTiDvec
.size(); i
++)
364 { if (pTiDvec
[i
] > max_likelihood
)
366 max_likelihood
= pTiDvec
[i
];
369 //cerr << "find max: " << i << " " << pTiDvec[i] << endl;
372 sample_genotype
= max_id
;
374 rate_t error_rate
= 1 - pTiDvec
[max_id
];
375 if (error_rate
< 1e-10L)
376 { error_rate
= 1e-10L;
378 phred_score
= int(-10 * log10(error_rate
));
382 void output_prior_file(string
&prior_file
)
384 ofstream
PRIOR (prior_file
.c_str());
386 { cerr
<< "fail create prior file" << prior_file
<< endl
;
389 PRIOR
<< "The 10 genotypes: \n";
390 for (int i
=0; i
<10; i
++)
391 { PRIOR
<< i
<< "(" << Bases
[DipGeno
[i
][0]] << Bases
[DipGeno
[i
][1]] << ") ";
393 PRIOR
<< endl
<< endl
;
396 PRIOR
<< "The compressed representaiton for each genotype:\n";
397 map
<int,char>::const_iterator map_it
= TwoBases
.begin();
398 while (map_it
!= TwoBases
.end())
399 { PRIOR
<< map_it
->first
<< ":" << map_it
->second
<< "; ";
404 PRIOR
<< "The transison substitution matrix:\n";
405 map
<int, int>::const_iterator map_it2
= TransSubs
.begin();
406 while (map_it2
!= TransSubs
.end())
407 { PRIOR
<< Bases
[map_it2
->first
] << ":" << Bases
[map_it2
->second
] << "; ";
410 PRIOR
<< endl
<< endl
;
413 PRIOR
<< "Hom_SNP_rate: " << Hom_SNP_rate
<< endl
;
414 PRIOR
<< "Het_SNP_rate: " << Het_SNP_rate
<< endl
;
415 PRIOR
<< "All_SNP_rate: " << Hom_SNP_rate
+ Het_SNP_rate
<< endl
;
416 PRIOR
<< "Trans_Tranv_ratio: " << Trans_Tranv_ratio
<< endl
;
418 PRIOR
<< "\nThe prior probabilites of genotypes for each reference base:\n\n";
420 for (int j
=0; j
<10; j
++)
421 { PRIOR
<< "\t" << Bases
[DipGeno
[j
][0]] << Bases
[DipGeno
[j
][1]];
424 for (int i
=0; i
<4; i
++)
425 { PRIOR
<< Bases
[i
] ;
426 for (int j
=0; j
<10; j
++)
428 PRIOR
<< "\t" << PTi
[i
][j
];
434 PRIOR
<< "\n Ref_Base_num " << Ref_Base_num
<< endl
;
435 PRIOR
<< " Cycle_num " << Cycle_num
<< endl
;
436 PRIOR
<< " Seq_Base_num " << Seq_Base_num
<< endl
;
437 PRIOR
<< " Quality_num " << Quality_num
<< endl
;
438 PRIOR
<< " PdTi array size: " << Ref_Cyc_base_qual
<< endl
;
439 PRIOR
<< " Qual_add_val " << Qual_add_val
<< endl
;
440 PRIOR
<< " Cycle_add_val " << Cycle_add_val
<< endl
;
441 PRIOR
<< " Ref_add_val " << Ref_add_val
<< endl
;
445 int main(int argc
, char *argv
[])
447 //get options from command line
449 while((c
=getopt(argc
, argv
, "t:r:e:d:m:q:o:s:h")) !=-1) {
451 case 't': Trans_Tranv_ratio
=atof(optarg
); break;
452 case 'r': Hom_SNP_rate
=atof(optarg
); break;
453 case 'e': Het_SNP_rate
=atof(optarg
); break;
454 case 'd': SNP_prior_mode
=atoi(optarg
); break;
455 case 'm': Error_profile_file
=optarg
; break;
456 case 'q': Ascii_Qual_Start
=atoi(optarg
); break;
457 case 'o': Out_prefix
=optarg
; break;
458 case 's': Only_output_snp
=atoi(optarg
); break;
459 case 'h': usage(); break;
463 if (argc
< 2) usage();
465 if (! Error_profile_file
.size())
466 { string exepath
= argv
[0];
467 int numh
= exepath
.rfind("/");
468 Error_profile_file
= exepath
.substr(0,numh
+1) + "XieSunney.blood.matrix.soapsnp";
470 cerr
<< "\nUsed error profile matrix file: " << Error_profile_file
<< endl
<< endl
;
472 string pileup_file
= argv
[optind
++]; //optind, argv[optind++]顺序指向非option的参数
474 clock_t time_start
, time_end
;
475 time_start
= clock();
477 //caluate the prior probability of each genotype
478 define_two_bases_markers();
481 cerr
<< "Assign prior probability to genotype done:\n\n";
483 //load the error and quality profile matrix into memory
484 load_soapsnp_error_profile(Error_profile_file
);
485 cerr
<< "Dimensions of error profile:\n";
487 string prior_file
= Out_prefix
+ ".prior";
488 output_prior_file(prior_file
);
491 cerr
<< "\nLoad the error profile done\n";
492 cerr
<< "Run time: " << double(time_end
- time_start
) / CLOCKS_PER_SEC
<< endl
;
494 //parse the pileup file, and calculate the likelihood of each genotypes
496 infile
.open(pileup_file
.c_str());
498 { cerr
<< "fail to open input file" << pileup_file
<< endl
;
501 string gentoytpe_file
;
502 if (Only_output_snp
== 1)
503 { gentoytpe_file
= Out_prefix
+ ".snp";
505 { gentoytpe_file
= Out_prefix
+ ".cns";
507 ofstream
outfile (gentoytpe_file
.c_str());
509 { cerr
<< "fail to open output file" << gentoytpe_file
<< endl
;
513 outfile
<< "#Chr\tPos\tRef\tGeno\tScore\tDepth\tBases\tQuals\tCycles\t[all mapped bases]\n";
515 while (getline( infile
, lineStr
, '\n' ))
517 vector
<string
> lineVec
;
518 boost::split(lineVec
,lineStr
, boost::is_any_of("\t"), boost::token_compress_on
);
519 int refBase
= get_base(lineVec
[2][0]);
522 get_clean_bases(refBase
,lineVec
[4],baseVec
);
525 get_quality(qualVec
, lineVec
[5]);
528 get_cycle(cyclVec
, lineVec
[7]);
530 ////////////////debug///////////////
531 //cerr << "refBase: " << Bases[refBase] << endl;
532 //for (int i=0; i<baseVec.size(); i++)
533 //{ cerr << i+1 << "\t" << Bases[baseVec[i]] << "\t" << qualVec[i] << "\t" << cyclVec[i] << endl;
536 ////////////////debug///////////////
538 //check the result of parsing pileup file
539 if (baseVec
.size() != qualVec
.size() || baseVec
.size() != cyclVec
.size())
540 { cerr
<< "Error happens at : " << lineVec
[4] << "\t" << lineVec
[5] << "\t" << lineVec
[7] << endl
<< endl
;
544 vector
<int> FbaseVec
;
545 vector
<int> FqualVec
;
546 vector
<int> FcyclVec
;
548 //filter the observed data, base not N, and qual > 5;
549 for (int i
=0; i
<baseVec
.size(); i
++)
550 { if (baseVec
[i
] != 4 && qualVec
[i
] >= 5) //当质量值小于5的时候,P(D|G)几乎是相等的,因此P(G|D) = P(G), 数据没有发挥作用,结果将只反映先验概率,造成错误。
551 { FbaseVec
.push_back(baseVec
[i
]);
552 FqualVec
.push_back(qualVec
[i
]);
553 FcyclVec
.push_back(cyclVec
[i
]);
556 if (FbaseVec
.size() == 0)
560 vector
<rate_t
> pTiDvec
;
561 calculte_genotype_probability(refBase
,pTiDvec
, FbaseVec
, FqualVec
, FcyclVec
);
562 int sample_genotype
= 0;
564 find_genotype_score(pTiDvec
,sample_genotype
,phred_score
);
565 if (Only_output_snp
== 0 || sample_genotype
!= refBase
)
568 outfile
<< lineVec
[0] << "\t" << lineVec
[1] << "\t" << lineVec
[2] << "\t";
569 if (sample_genotype
< 4)
570 { outfile
<< Bases
[sample_genotype
];
572 outfile
<< TwoBases
[sample_genotype
] << "(" << Bases
[DipGeno
[sample_genotype
][0]] << Bases
[DipGeno
[sample_genotype
][1]] << ")";
575 outfile
<< "\t" << phred_score
<< "\t" << FbaseVec
.size() << "\t" ;
576 for (int i
=0; i
<FbaseVec
.size(); i
++)
577 { outfile
<< Bases
[FbaseVec
[i
]];
580 for (int i
=0; i
<FbaseVec
.size(); i
++)
581 { outfile
<< FqualVec
[i
] << ",";
584 for (int i
=0; i
<FbaseVec
.size(); i
++)
585 { outfile
<< FcyclVec
[i
] << ",";
587 outfile
<< "\t" << baseVec
.size() << "\t"<< lineVec
[4] << "\t" << lineVec
[5] << "\t" << lineVec
[7] << endl
;
600 cerr
<< "\nParse the pileup file and genotype calculation done\n";
601 cerr
<< "Run time: " << double(time_end
- time_start
) / CLOCKS_PER_SEC
<< endl
;