new file: pixi.toml
[GalaxyCodeBases.git] / projects / recombineMap / sperm_by_fw / snp_calling / call_diploid_snp.cpp
blob7b583fab36fa0dd06a5caa4e018d3d81609fe0d6
1 //本程序根据参考序列上mapped的reads来计算测序样本的genotype, 读取bwa mpielup格式输入文件
2 //依据bayes theory,以sample出现纯合和杂合SNP的概率和测序错误分值模型为先验概率,计算观测数据下每一种genotype的后验概率。
3 //测序错误分值模型,与模拟reads所用的模型相同,由分析重测序数据而得。它是一个四维矩阵:ref_base x cycle x seq_base x quality。
4 //解释:我们实际想得到的是P(D|genotype), 此处ref_base即为geotype, 而D包含三个属性(cycle,seq_base,quality)。
5 //我们想得到是P(cycle,seq_base,quality|ref_base), 而它又等价于P(cycle|ref_base) * P(seq_base,quality|cycle,ref_base)。
6 //因为P(cycle|ref_base)可以认为是一个常数,在整个bayes公式的计算中不起作用,因此我们把它忽略。
7 //这样可以将P(seq_base,quality|cycle,ref_base)直接用于bayes公式,也就形成了ref_base*cycle作为列,seq_base*quality作为行的二维matrix文件。
8 //本程序采用long double (128bit浮点数),来高精度的表示概率。
9 //coverage depth最大允许值为400,以防计算过程中数值指数部分超过long double界限。超过400的部分将被忽略,程序不报错不中止。
10 //选取概率最大的genotype作为最终结果,并将其错误率转换为phred quality score, 并设上限为100。
12 //pirs和soapsnp的matrix是等效的,为保持一致,此程序采用soapsnp格式的matrix, P(base|ref,qual,cycle)
14 //Author: Fan Wei, fanweisz09@gmail.com
15 //Date: 2012/6/4
18 #include <stdio.h>
19 #include <iostream>
20 #include <fstream>
21 #include <string>
22 #include <vector>
23 #include <algorithm>
24 #include <map>
25 #include <set>
26 #include <cmath>
27 #include <inttypes.h>
28 #include<zlib.h>
29 #include <boost/lexical_cast.hpp>
30 #include <boost/algorithm/string.hpp>
31 #include "gzstream.h"
33 using namespace std;
35 typedef long double rate_t;
36 rate_t PTi[4][10]; //store the prior probability for each substitutions
37 rate_t *PdTi; //store the distribution of error and quality
38 int Ref_Base_num;
39 int Cycle_num;
40 int Seq_Base_num;
41 int Quality_num;
42 int Qual_add_val;
43 int Cycle_add_val;
44 int Ref_add_val;
45 int Ref_Cyc_base_qual;
46 string Error_profile_file;
47 string Out_prefix = "output";
48 float Trans_Tranv_ratio = 2.0;
49 float Hom_SNP_rate = 0.0005;
50 float Het_SNP_rate = 0.0005;
51 int SNP_prior_mode = 1;
52 int Ascii_Qual_Start = 33;
53 int Only_output_snp = 1;
55 //由0123 4到ACGT N
56 char Bases[5] ={
57 'A', 'C', 'G', 'T', 'N'
60 //用0,1,2,3,分别代表A,C,G,T
61 int DipGeno[10][2] = {0,0, 1,1, 2,2, 3,3, 0,1, 0,2, 0,3, 1,2, 1,3, 2,3};
63 map<int,char> TwoBases;
64 map<int, int> TransSubs;
67 void define_TransSubs()
69 TransSubs[0] = 2; TransSubs[2] = 0;
70 TransSubs[1] = 3; TransSubs[3] = 1;
74 void define_two_bases_markers()
77 TwoBases[0] = 'A'; TwoBases[1] = 'C'; TwoBases[2] = 'G'; TwoBases[3] = 'T';
78 TwoBases[4] = 'M'; TwoBases[5] = 'R'; TwoBases[6] = 'W';
79 TwoBases[7] = 'S'; TwoBases[8] = 'Y'; TwoBases[9] = 'K';
83 void usage()
84 { cout << "calculate_diploid_genotype <*.mpileup>" << endl;
85 cout << " version 1.0" << endl;
86 cout << " -r <float> homozygous SNP prior rate, default=" << Hom_SNP_rate << endl;
87 cout << " -e <float> heterozygous SNP prior rate, default=" << Het_SNP_rate << endl;
88 cout << " -t <float> transition vs transversion prior ratio, default=" << Trans_Tranv_ratio << endl;
89 cout << " -d <int> set the prior mode, 0: no prior; 1: with prior; default="<< SNP_prior_mode << endl;
90 cout << " -m <file> matrix file of error and quality distributions, default=exe_path/XieSunney.blood.matrix.soapsnp" << endl;
91 cout << " -q <int> ASCII chracter standing for quality==0, default=" << Ascii_Qual_Start << endl;
92 cout << " -o <string> prefix for the output files" << endl;
93 cout << " -s <int> two output mode, 1, only snp; 0, cns; default=" << Only_output_snp << endl;
94 cout << " -h get help information" << endl;
95 exit(0);
99 //由ACGT N得到0123 4
100 int get_base (char refBase)
102 int refBaseId;
103 switch (refBase)
105 case 'A': refBaseId = 0; break;
106 case 'C': refBaseId = 1; break;
107 case 'G': refBaseId = 2; break;
108 case 'T': refBaseId = 3; break;
109 case 'a': refBaseId = 0; break;
110 case 'c': refBaseId = 1; break;
111 case 'g': refBaseId = 2; break;
112 case 't': refBaseId = 3; break;
113 default: refBaseId = 4;
115 return refBaseId;
119 //限定freq_rate在1到1e-9之间,统计样本量级1G
120 void load_soapsnp_error_profile (string &matrix_file)
122 igzstream infile;
123 infile.open(matrix_file.c_str());
124 if ( ! infile )
125 { cerr << "fail to open input file" << matrix_file << endl;
126 exit(0);
129 int pi = 0; //PdTi的循环变量
130 int line_field_number = 18;
131 vector<string> textlines;
133 string lineStr;
134 while (getline( infile, lineStr, '\n' ))
135 { if (lineStr[0] == '#')
136 { continue;
139 vector<string> lineVec;
140 boost::split(lineVec,lineStr, boost::is_any_of(" \t\n"), boost::token_compress_on);
141 if (lineVec.size() != line_field_number)
142 { continue;
145 Quality_num = boost::lexical_cast<int>(lineVec[0]) + 1;
146 Cycle_num = boost::lexical_cast<int>(lineVec[1]) + 1;
148 textlines.push_back(lineStr);
151 Ref_Base_num = 4;
152 Seq_Base_num = 4;
153 Ref_Cyc_base_qual = Ref_Base_num * Seq_Base_num * Quality_num * Cycle_num;
154 Qual_add_val = Ref_Base_num * Seq_Base_num * Cycle_num;
155 Cycle_add_val = Ref_Base_num * Seq_Base_num;
156 Ref_add_val = Seq_Base_num;
158 PdTi = new rate_t[Ref_Cyc_base_qual];
159 for (int j=0; j<textlines.size(); j++)
161 vector<string> lineVec;
162 boost::split(lineVec,textlines[j], boost::is_any_of(" \t\n"), boost::token_compress_on);
163 int this_qual = boost::lexical_cast<int>(lineVec[0]);
164 int this_cycl = boost::lexical_cast<int>(lineVec[1]);
166 for (int i=2; i<line_field_number; i++)
167 { int k = i - 2;
168 int this_ref = k / Seq_Base_num;
169 int this_base = k % Seq_Base_num;
170 pi = this_qual*Qual_add_val + this_cycl*Cycle_add_val + this_ref*Ref_add_val + this_base;
171 rate_t freq_rate = boost::lexical_cast<rate_t>(lineVec[i]);
173 if (pi < Ref_Cyc_base_qual)
174 { PdTi[pi] = (freq_rate > 1e-9L) ? freq_rate : 1e-9L;
175 }else{
176 cerr << "pi exceeds its range " << Ref_Cyc_base_qual << endl;
177 exit(0);
185 //根据Trans_Tranv_ratio,Hom_SNP_rate,Het_SNP_rate,SNP_prior_mode来计算genotype的prior probability
186 void assign_PTi_rates ()
188 double TsTvR = Trans_Tranv_ratio;
189 double NonSnpRate = 1 - Hom_SNP_rate - Het_SNP_rate;
190 double hom_trans_rate = Hom_SNP_rate * TsTvR*2 / (TsTvR*2 + 1 + 1);
191 double hom_tranv_rate = Hom_SNP_rate * 1 / (TsTvR*2 + 1 + 1);
193 double HapSnpRate = Hom_SNP_rate + Het_SNP_rate; //近似,非准确
194 double HapNonSnpRate = 1 - HapSnpRate;
195 double HapTransSnpRate = HapSnpRate * TsTvR*2 / (TsTvR*2 + 1 + 1);
196 double HapTranvSnpRate = HapSnpRate * 1 / (TsTvR*2 + 1 + 1);
198 double hetNonTransRate;
199 double hetNonTranvRate;
200 double hetTransTranvRate;
201 double hetTranvTranvRate;
202 double relativeTotal;
204 relativeTotal = HapNonSnpRate*HapTransSnpRate * 1 + HapNonSnpRate*HapTranvSnpRate *2 + HapTransSnpRate*HapTranvSnpRate * 2 + HapTranvSnpRate*HapTranvSnpRate * 1;
205 hetNonTransRate = (relativeTotal > 0) ? HapNonSnpRate*HapTransSnpRate / relativeTotal * Het_SNP_rate : 0;
206 hetNonTranvRate = (relativeTotal > 0) ? HapNonSnpRate*HapTranvSnpRate / relativeTotal * Het_SNP_rate : 0;
207 hetTransTranvRate = (relativeTotal > 0) ? HapTransSnpRate*HapTranvSnpRate / relativeTotal * Het_SNP_rate : 0;
208 hetTranvTranvRate = (relativeTotal > 0) ? HapTranvSnpRate*HapTranvSnpRate / relativeTotal * Het_SNP_rate : 0;
210 if (SNP_prior_mode == 0)
211 { NonSnpRate = hom_trans_rate = hom_tranv_rate = hetNonTransRate = hetNonTranvRate = hetTransTranvRate = hetTranvTranvRate = 0.1;
214 for (int i=0; i<4; i++)
215 { for (int j=0; j<10; j++)
216 { if (i == DipGeno[j][0] && i == DipGeno[j][1]) // non-SNP
218 PTi[i][j] = NonSnpRate;
219 }else if(i != DipGeno[j][0] && DipGeno[j][0] == DipGeno[j][1]) //homo SNP
221 PTi[i][j] = (TransSubs[i] == DipGeno[j][0]) ? hom_trans_rate : hom_tranv_rate ;
222 }else //het SNP
223 { if (i == DipGeno[j][0] || i == DipGeno[j][1]) //het-one
224 { PTi[i][j] = (TransSubs[i] == DipGeno[j][0] || TransSubs[i] == DipGeno[j][1]) ? hetNonTransRate : hetNonTranvRate;
225 }else //het-two
226 { PTi[i][j] = (TransSubs[i] != DipGeno[j][0] && TransSubs[i] != DipGeno[j][1]) ? hetTranvTranvRate : hetTransTranvRate;
234 //从pileup格式的base_str中得到干净的bases信息
235 void get_clean_bases(int refBase, string &raw_base_str,vector<int> &clean_base_vec)
237 int pos = 0;
238 int len = raw_base_str.size();
239 while (pos < len)
241 if (raw_base_str[pos] == '^')
242 { pos += 2;
244 else if (raw_base_str[pos] == '+' || raw_base_str[pos] == '-')
245 { pos += 1;
246 string indel_num;
247 while (pos < len)
248 { if (raw_base_str[pos] >= 48 && raw_base_str[pos] <= 57 ) //ASCII: 0-9
249 { indel_num.push_back(raw_base_str[pos]);
250 pos ++;
252 else
253 { break;
256 pos += atoi(indel_num.c_str());
258 else
260 switch (raw_base_str[pos])
262 case '.': clean_base_vec.push_back(refBase); break;
263 case ',': clean_base_vec.push_back(refBase); break;
264 case 'A': clean_base_vec.push_back(0); break;
265 case 'a': clean_base_vec.push_back(0); break;
266 case 'C': clean_base_vec.push_back(1); break;
267 case 'c': clean_base_vec.push_back(1); break;
268 case 'G': clean_base_vec.push_back(2); break;
269 case 'g': clean_base_vec.push_back(2); break;
270 case 'T': clean_base_vec.push_back(3); break;
271 case 't': clean_base_vec.push_back(3); break;
272 case 'N': clean_base_vec.push_back(4); break;
273 case 'n': clean_base_vec.push_back(4); break;
274 case '*': clean_base_vec.push_back(4); break;
275 case '>': clean_base_vec.push_back(4); break;
276 case '<': clean_base_vec.push_back(4); break;
277 //自动省略$符号的判断
279 pos += 1;
286 //从pileup格式的quality_str中得到quality信息,并转换成phred-scale, 33开始
287 void get_quality(vector<int> &qual_vec, string &qual_str)
289 for (int i=0; i<qual_str.size(); i++)
290 { int qual_val = qual_str[i] - Ascii_Qual_Start;
291 qual_val = (qual_val < Quality_num) ? qual_val : Quality_num - 1; //质量值大于40的全部当成40
292 qual_vec.push_back(qual_val);
296 //从pileup格式的cycle_str中得到cycle信息
297 void get_cycle (vector<int> &cycl_vec, string &cycl_str)
299 vector<string> temp_vec;
300 boost::split(temp_vec,cycl_str, boost::is_any_of(","), boost::token_compress_on);
301 for (int i=0; i<temp_vec.size(); i++)
302 { //int cycle_val = boost::lexical_cast<int>(temp_vec[i]); //runs much slow
303 int cycle_val = atoi(temp_vec[i].c_str()); //runs fast
304 if (cycle_val > Cycle_num)
305 { cycle_val = Cycle_num;
307 cycl_vec.push_back(cycle_val - 1); //cycle 0-99代表100个cycles
312 //根据指定的参考序列碱基,计算每一种genotype的后验概率P(Ti|D)
313 //coverage depth最大允许值为400,以防计算过程中数值指数部分超过long double界限
314 void calculte_genotype_probability(int refBase, vector<rate_t> &pTiDvec, vector<int> &baseVec, vector<int> &qualVec, vector<int> &cyclVec)
316 //cerr << "refbase: " << refBase << " " << Bases[refBase] << endl;
317 rate_t pD = 0.0;
318 int len = (baseVec.size() < 400) ? baseVec.size() : 400;
320 //vector<rate_t> pDTiVec;
321 for (int genotype=0; genotype<10; genotype++)
323 rate_t pTi = PTi[refBase][genotype]; //
324 rate_t pDTi = 1.0;
326 if (pTi != 0)
327 { for (int j=0; j<len; j++)
329 int num1 = qualVec[j]*Qual_add_val + cyclVec[j]*Cycle_add_val + DipGeno[genotype][0]*Ref_add_val + + baseVec[j];
330 int num2 = qualVec[j]*Qual_add_val + cyclVec[j]*Cycle_add_val + DipGeno[genotype][1]*Ref_add_val + + baseVec[j];
331 if (num1 >= Ref_Cyc_base_qual || num2 >= Ref_Cyc_base_qual)
332 { cerr << "num1 or num2 exceeds its range " << num1 << "\t" << num2 << endl;
333 exit(0);
335 pDTi *= (PdTi[num1] + PdTi[num2] ) / 2;
336 //cerr << "# " << DipGeno[genotype][0] << "\t" << DipGeno[genotype][1] << "\t" << cyclVec[j] << "\t" << baseVec[j] << "\t" << qualVec[j] << "\t"<< PdTi[num1] << "\t" << PdTi[num2] << "\t" << num1 << "\t" << num2 << endl;
340 //pDTiVec.push_back(pDTi);
341 pTiDvec.push_back( pTi*pDTi );
342 pD += pTi*pDTi;
343 //cerr << "calculate: " << genotype << "\t" << pTi << "\t" << pDTi << "\t" << pTi*pDTi << "\t" << pD << endl;
346 for (int genotype=0; genotype<10; genotype++)
347 { pTiDvec[genotype] = pTiDvec[genotype] / pD;
350 //output middle results
351 /*for (int genotype=0; genotype<10; genotype++)
352 { rate_t pTi = PTi[refBase][genotype];
353 rate_t pDTi = pDTiVec[genotype];
354 cout << genotype << "\t" << pTi << "\t" << pDTi << "\t" << pTi*pDTi << "\t" << pD << "\t" << pTiDvec[genotype] << endl;
358 //find the genotype with maximum posterior likelihood and calcualte the phred-scale score (up to 100)
359 void find_genotype_score(vector<rate_t> &pTiDvec, int &sample_genotype, int &phred_score)
361 int max_id = 0;
362 rate_t max_likelihood = 0.0;
363 for (int i=0; i<pTiDvec.size(); i++)
364 { if (pTiDvec[i] > max_likelihood)
365 { max_id = i;
366 max_likelihood = pTiDvec[i];
369 //cerr << "find max: " << i << " " << pTiDvec[i] << endl;
372 sample_genotype = max_id;
374 rate_t error_rate = 1 - pTiDvec[max_id];
375 if (error_rate < 1e-10L)
376 { error_rate = 1e-10L;
378 phred_score = int(-10 * log10(error_rate));
382 void output_prior_file(string &prior_file)
384 ofstream PRIOR (prior_file.c_str());
385 if (! PRIOR)
386 { cerr << "fail create prior file" << prior_file << endl;
389 PRIOR << "The 10 genotypes: \n";
390 for (int i=0; i<10; i++)
391 { PRIOR << i << "(" << Bases[DipGeno[i][0]] << Bases[DipGeno[i][1]] << ") ";
393 PRIOR << endl << endl;
396 PRIOR << "The compressed representaiton for each genotype:\n";
397 map<int,char>::const_iterator map_it = TwoBases.begin();
398 while (map_it != TwoBases.end())
399 { PRIOR << map_it->first << ":" << map_it->second << "; ";
400 ++map_it;
402 PRIOR << "\n\n";
404 PRIOR << "The transison substitution matrix:\n";
405 map<int, int>::const_iterator map_it2 = TransSubs.begin();
406 while (map_it2 != TransSubs.end())
407 { PRIOR << Bases[map_it2->first] << ":" << Bases[map_it2->second] << "; ";
408 ++map_it2;
410 PRIOR << endl << endl;
413 PRIOR << "Hom_SNP_rate: " << Hom_SNP_rate << endl;
414 PRIOR << "Het_SNP_rate: " << Het_SNP_rate << endl;
415 PRIOR << "All_SNP_rate: " << Hom_SNP_rate + Het_SNP_rate << endl;
416 PRIOR << "Trans_Tranv_ratio: " << Trans_Tranv_ratio << endl;
418 PRIOR << "\nThe prior probabilites of genotypes for each reference base:\n\n";
419 PRIOR << "R\\G";
420 for (int j=0; j<10; j++)
421 { PRIOR << "\t" << Bases[DipGeno[j][0]] << Bases[DipGeno[j][1]];
423 PRIOR << endl;
424 for (int i=0; i<4; i++)
425 { PRIOR << Bases[i] ;
426 for (int j=0; j<10; j++)
428 PRIOR << "\t" << PTi[i][j];
430 PRIOR << endl;
434 PRIOR << "\n Ref_Base_num " << Ref_Base_num << endl;
435 PRIOR << " Cycle_num " << Cycle_num << endl;
436 PRIOR << " Seq_Base_num " << Seq_Base_num << endl;
437 PRIOR << " Quality_num " << Quality_num << endl;
438 PRIOR << " PdTi array size: " << Ref_Cyc_base_qual << endl;
439 PRIOR << " Qual_add_val " << Qual_add_val << endl;
440 PRIOR << " Cycle_add_val " << Cycle_add_val << endl;
441 PRIOR << " Ref_add_val " << Ref_add_val << endl;
445 int main(int argc, char *argv[])
447 //get options from command line
448 int c;
449 while((c=getopt(argc, argv, "t:r:e:d:m:q:o:s:h")) !=-1) {
450 switch(c) {
451 case 't': Trans_Tranv_ratio=atof(optarg); break;
452 case 'r': Hom_SNP_rate=atof(optarg); break;
453 case 'e': Het_SNP_rate=atof(optarg); break;
454 case 'd': SNP_prior_mode=atoi(optarg); break;
455 case 'm': Error_profile_file=optarg; break;
456 case 'q': Ascii_Qual_Start=atoi(optarg); break;
457 case 'o': Out_prefix=optarg; break;
458 case 's': Only_output_snp=atoi(optarg); break;
459 case 'h': usage(); break;
460 default: usage();
463 if (argc < 2) usage();
465 if (! Error_profile_file.size())
466 { string exepath = argv[0];
467 int numh = exepath.rfind("/");
468 Error_profile_file = exepath.substr(0,numh+1) + "XieSunney.blood.matrix.soapsnp";
470 cerr << "\nUsed error profile matrix file: " << Error_profile_file << endl << endl;
472 string pileup_file = argv[optind++]; //optind, argv[optind++]顺序指向非option的参数
474 clock_t time_start, time_end;
475 time_start = clock();
477 //caluate the prior probability of each genotype
478 define_two_bases_markers();
479 define_TransSubs();
480 assign_PTi_rates();
481 cerr << "Assign prior probability to genotype done:\n\n";
483 //load the error and quality profile matrix into memory
484 load_soapsnp_error_profile(Error_profile_file);
485 cerr << "Dimensions of error profile:\n";
487 string prior_file = Out_prefix + ".prior";
488 output_prior_file(prior_file);
490 time_end = clock();
491 cerr << "\nLoad the error profile done\n";
492 cerr << "Run time: " << double(time_end - time_start) / CLOCKS_PER_SEC << endl;
494 //parse the pileup file, and calculate the likelihood of each genotypes
495 igzstream infile;
496 infile.open(pileup_file.c_str());
497 if ( ! infile )
498 { cerr << "fail to open input file" << pileup_file << endl;
501 string gentoytpe_file;
502 if (Only_output_snp == 1)
503 { gentoytpe_file = Out_prefix + ".snp";
504 }else
505 { gentoytpe_file = Out_prefix + ".cns";
507 ofstream outfile (gentoytpe_file.c_str());
508 if ( ! outfile )
509 { cerr << "fail to open output file" << gentoytpe_file << endl;
512 long int loop = 0;
513 outfile << "#Chr\tPos\tRef\tGeno\tScore\tDepth\tBases\tQuals\tCycles\t[all mapped bases]\n";
514 string lineStr;
515 while (getline( infile, lineStr, '\n' ))
517 vector<string> lineVec;
518 boost::split(lineVec,lineStr, boost::is_any_of("\t"), boost::token_compress_on);
519 int refBase = get_base(lineVec[2][0]);
521 vector<int> baseVec;
522 get_clean_bases(refBase,lineVec[4],baseVec);
524 vector<int> qualVec;
525 get_quality(qualVec, lineVec[5]);
527 vector<int> cyclVec;
528 get_cycle(cyclVec, lineVec[7]);
530 ////////////////debug///////////////
531 //cerr << "refBase: " << Bases[refBase] << endl;
532 //for (int i=0; i<baseVec.size(); i++)
533 //{ cerr << i+1 << "\t" << Bases[baseVec[i]] << "\t" << qualVec[i] << "\t" << cyclVec[i] << endl;
536 ////////////////debug///////////////
538 //check the result of parsing pileup file
539 if (baseVec.size() != qualVec.size() || baseVec.size() != cyclVec.size())
540 { cerr << "Error happens at : " << lineVec[4] << "\t" << lineVec[5] << "\t" << lineVec[7] << endl << endl;
541 exit(0);
544 vector<int> FbaseVec;
545 vector<int> FqualVec;
546 vector<int> FcyclVec;
548 //filter the observed data, base not N, and qual > 5;
549 for (int i=0; i<baseVec.size(); i++)
550 { if (baseVec[i] != 4 && qualVec[i] >= 5) //当质量值小于5的时候,P(D|G)几乎是相等的,因此P(G|D) = P(G), 数据没有发挥作用,结果将只反映先验概率,造成错误。
551 { FbaseVec.push_back(baseVec[i]);
552 FqualVec.push_back(qualVec[i]);
553 FcyclVec.push_back(cyclVec[i]);
556 if (FbaseVec.size() == 0)
557 { continue;
560 vector<rate_t> pTiDvec;
561 calculte_genotype_probability(refBase,pTiDvec, FbaseVec, FqualVec, FcyclVec);
562 int sample_genotype = 0;
563 int phred_score = 0;
564 find_genotype_score(pTiDvec,sample_genotype,phred_score);
565 if (Only_output_snp == 0 || sample_genotype != refBase)
568 outfile << lineVec[0] << "\t" << lineVec[1] << "\t" << lineVec[2] << "\t";
569 if (sample_genotype < 4)
570 { outfile << Bases[sample_genotype];
571 }else{
572 outfile << TwoBases[sample_genotype] << "(" << Bases[DipGeno[sample_genotype][0]] << Bases[DipGeno[sample_genotype][1]] << ")";
575 outfile << "\t" << phred_score << "\t" << FbaseVec.size() << "\t" ;
576 for (int i=0; i<FbaseVec.size(); i++)
577 { outfile << Bases[FbaseVec[i]];
579 outfile << "\t";
580 for (int i=0; i<FbaseVec.size(); i++)
581 { outfile << FqualVec[i] << ",";
583 outfile << "\t";
584 for (int i=0; i<FbaseVec.size(); i++)
585 { outfile << FcyclVec[i] << ",";
587 outfile << "\t" << baseVec.size() << "\t"<< lineVec[4] << "\t" << lineVec[5] << "\t" << lineVec[7] << endl;
591 //loop ++;
592 //if (loop > 10000)
593 //{ break;
597 infile.close();
598 outfile.close();
599 time_end = clock();
600 cerr << "\nParse the pileup file and genotype calculation done\n";
601 cerr << "Run time: " << double(time_end - time_start) / CLOCKS_PER_SEC << endl;