4 int Call_win::initialize(ubit64_t start
) {
5 std::string::size_type i
;
6 for(i
=0; i
!= read_len
+ win_size
; i
++) {
7 sites
[i
].pos
= i
+start
;
11 //update 2010-12-20 by guyue
12 int Call_win::deep_init(ubit64_t start
) {
13 memset(sites
, 0, pos_size
* (read_len
+ win_size
));
15 for(i
=0; i
!= read_len
+ win_size
; i
++) {
16 sites
[i
].pos
= i
+start
;
19 //sites[i].repeat_time = 0;
20 //sites[i].dep_uni = 0;
21 //memset(sites[i].count_uni,0,sizeof(int)*4);
22 //memset(sites[i].q_sum,0,sizeof(int)*4);
24 //memset(sites[i].count_sfs,0,sizeof(int)*4);
25 //memset(sites[i].base_info,0,sizeof(small_int)*4*2*64*256);
26 //memset(sites[i].count_all,0,sizeof(int)*4);
30 //update 2010-12-20 by guyue
31 int Call_win::recycle()
33 std::string::size_type i
;
34 //for(i=0; i != read_len ; i++)
36 // sites[i].pos = sites[i+win_size].pos;
37 // sites[i].ori = sites[i+win_size].ori;
38 // sites[i].depth = sites[i+win_size].depth;
39 // sites[i].repeat_time = sites[i+win_size].repeat_time;
40 // sites[i].dep_uni = sites[i+win_size].dep_uni;
41 // memcpy(sites[i].base_info, sites[i+win_size].base_info, sizeof(small_int)*4*2*64*256); // 4 types of bases, 2 strands, max quality score is 64, and max read length 256
42 // memcpy(sites[i].count_uni, sites[i+win_size].count_uni, sizeof(int)*4);
43 // memcpy(sites[i].q_sum, sites[i+win_size].q_sum, sizeof(int)*4);
44 // memcpy(sites[i].count_all, sites[i+win_size].count_all, sizeof(int)*4);
46 // memcpy(sites[i].count_sfs, sites[i+win_size].count_sfs, sizeof(int)*4);
49 memcpy(&sites
[0], &sites
[win_size
], pos_size
*read_len
);
50 memset(&sites
[read_len
], 0, pos_size
* (win_size
));
51 for(i
=read_len
; i
!= read_len
+win_size
; i
++)
53 //sites[i].ori = 0xFF;
54 //sites[i].pos = sites[i-1].pos+1;
56 //sites[i].repeat_time = 0;
57 //sites[i].dep_uni = 0;
58 //memset(sites[i].count_uni,0,sizeof(int)*4);
59 //memset(sites[i].q_sum,0,sizeof(int)*4);
60 //memset(sites[i].base_info,0,sizeof(small_int)*4*2*64*256);
61 //memset(sites[i].count_all,0,sizeof(int)*4);
63 //memset(sites[i].count_sfs,0,sizeof(int)*4);
65 sites
[i
].pos
= sites
[i
-1].pos
+1;
71 int Call_win::call_cns(Chr_name call_name
, Chr_info
* call_chr
, ubit64_t call_length
, Prob_matrix
* mat
, Parameter
* para
, gzoutstream
* consensus
, my_ofstream
& baseinfo
, SfsMethod
&sfsMethod
, const int id
)
73 std::string::size_type coord
;
75 ubit64_t o_base
, strand
;
76 char allele1
, allele2
, genotype
, type
, type1
/*best genotype*/, type2
/*suboptimal genotype*/, base1
, base2
, base3
;
77 int i
, q_score
, q_adjusted
, qual1
, qual2
, qual3
, q_cns
, all_count1
, all_count2
, all_count3
;
79 //pcr_dep_count = new int [para->read_length*2];
80 double rank_sum_test_value
, binomial_test_value
;
82 // double * real_p_prior = new double [16];
83 // double * likelihoods = new double [10];
84 // memset(likelihoods, 0, sizeof(double)*10);
85 //int n_pcr_dep_count;//update by zhukai on 2010-12-27
87 //std::cerr<<"Call length="<<call_length<<endl;
88 for(std::string::size_type j
=0; j
!= call_length
; ++j
) //modify the j++ to ++j update by zhukai on 2010-12-21
90 //cerr<<sites[j].pos<<endl;
93 if (NULL
== call_chr
->get_region() )
97 else if (! call_chr
->is_in_region(sites
[j
].pos
))
106 sites
[j
].ori
= (call_chr
->get_bin_base(sites
[j
].pos
))&0xF;
107 if( ((sites
[j
].ori
&4) !=0)/*an N*/ && sites
[j
].depth
== 0)
108 { // CNS text format:
109 // ChrID\tPos\tRef\tCns\tQual\tBase1\tAvgQ1\tCountUni1\tCountAll1\tBase2\tAvgQ2\tCountUni2\tCountAll2\tDepth\tRank_sum\tCopyNum\tSNPstauts\n"
110 if(!para
->glf_format
&& ! para
->is_snp_only
)
112 // the output file was not opened,
113 if (!consensus
->is_open())
117 (*consensus
)<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<"\tN\tN\t0\tN\t0\t0\t0\tN\t0\t0\t0\t0\t1.00000\t255.00000\t0"<<"\n";
119 else if (para
->glf_format
)
121 memset(likelihoods
, 0, sizeof(double)*10);
122 consensus
->write(reinterpret_cast<char*>(likelihoods
), sizeof(double)*10);
124 if(!consensus
->good())
126 cerr
<<"Broken ofstream after writting Position "<<(sites
[j
].pos
+1)<<" at "<<call_name
<<endl
;
129 baseinfo
<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<"\tN\tN\t0\tN\t0\t0\t0\tN\t0\t0\t0\t0\t1.00000\t255.00000\t0"<<"\n";
137 base1
= 0, base2
= 0, base3
= 0;
138 qual1
= -1, qual2
= -2, qual3
= -3;
139 all_count1
= 0, all_count2
=0, all_count3
=0;
142 // This position is uniquely covered by at least one nucleotide
145 // i is four kind of alleles
146 if(sites
[j
].q_sum
[i
] >= qual1
)
153 qual1
= sites
[j
].q_sum
[i
];
155 else if (sites
[j
].q_sum
[i
]>=qual2
)
160 qual2
= sites
[j
].q_sum
[i
];
162 else if (sites
[j
].q_sum
[i
]>=qual3
)
165 qual3
= sites
[j
].q_sum
[i
];
174 // Adjust the best base so that things won't look ugly if the pos is not covered
175 base1
= (sites
[j
].ori
&7);
177 else if(qual2
==0 && base1
!= (sites
[j
].ori
&7))
179 base2
= (sites
[j
].ori
&7);
188 // This position is covered by all repeats
191 if(sites
[j
].count_all
[i
] >= all_count1
)
194 all_count3
= all_count2
;
196 all_count2
= all_count1
;
198 all_count1
= sites
[j
].count_all
[i
];
200 else if (sites
[j
].count_all
[i
]>=all_count2
)
203 all_count3
= all_count2
;
205 all_count2
= sites
[j
].count_all
[i
];
207 else if (sites
[j
].count_all
[i
]>=all_count3
)
210 all_count3
= sites
[j
].count_all
[i
];
219 base1
= (sites
[j
].ori
&7);
221 else if(all_count2
==0 && base1
!= (sites
[j
].ori
&7))
223 base2
= (sites
[j
].ori
&7);
230 // Calculate likelihood
231 for(genotype
=0;genotype
!=16; ++genotype
)
233 mat
->type_likely
[genotype
] = 0.0;
235 for(o_base
=0;o_base
!=4; ++o_base
)
237 //cerr<<o_base<<endl;
238 if(sites
[j
].count_uni
[o_base
]==0)
242 global_dep_count
= -1;
243 memset(pcr_dep_count
, 0, sizeof(int)*2*para
->read_length
);
245 for(q_score
=para
->q_max
-para
->q_min
; q_score
!= -1; --q_score
)
247 for(coord
=0; coord
!= para
->read_length
; ++coord
)
249 for(strand
=0; strand
!=2; ++strand
)
251 base_i
= sites
[j
].base_info
[o_base
<<15|strand
<<14|q_score
<<8|coord
]; //update by zhukai on 2010-12-28
252 for(k
=0; k
!= base_i
; ++k
)
254 if(pcr_dep_count
[strand
*para
->read_length
+coord
]==0)
256 global_dep_count
+= 1;
258 pcr_dep_count
[strand
*para
->read_length
+coord
] += 1;
259 q_adjusted
= int( pow(10, (log10(q_score
) +(pcr_dep_count
[strand
*para
->read_length
+coord
]-1)*para
->pcr_dependency
+global_dep_count
*para
->global_dependency
)) +0.5);
264 for(allele1
= 0;allele1
!= 4; ++allele1
)
266 for(allele2
= allele1
; allele2
!= 4; ++allele2
)
268 mat
->type_likely
[allele1
<<2|allele2
] += log10(0.5*mat
->p_matrix
[ ((ubit64_t
)q_adjusted
<<12) | (coord
<<4) | (allele1
<<2) | o_base
] +0.5*mat
->p_matrix
[((ubit64_t
)q_adjusted
<<12) | (coord
<<4) | (allele2
<<2) | o_base
]);
277 /* it is used to sfs, add by Bill*/
278 if (para
->sfs
!= NULL
)
280 sfsMethod
.getMapData(sites
[j
], mat
, call_name
, id
);
283 // the output file was not opened,
284 if (!consensus
->is_open())
291 for(int i
=0; i
!=10; ++i
)
293 consensus
->write(reinterpret_cast<char*>(&mat
->type_likely
[glf_type_code
[i
]]), sizeof(double));
296 if(!consensus
->good())
298 cerr
<<"Broken ofstream after writting Position "<<(sites
[j
].pos
+1)<<" at "<<call_name
<<endl
;
303 // Calculate Posterior Probability
304 memcpy(real_p_prior
, &mat
->p_prior
[((ubit64_t
)sites
[j
].ori
&0x7)<<4], sizeof(double)*16);
305 if ( (sites
[j
].ori
& 0x8) && para
->refine_mode
)
308 snp_p_prior_gen(real_p_prior
, call_chr
->find_snp(sites
[j
].pos
), para
, sites
[j
].ori
);
310 memset(mat
->type_prob
,0,sizeof(rate_t
)*17);
312 for (allele1
=0; allele1
!=4; ++allele1
)
314 for (allele2
=allele1
; allele2
!=4; ++allele2
)
316 genotype
= allele1
<<2|allele2
;
317 if (para
->is_monoploid
&& allele1
!= allele2
)
321 mat
->type_prob
[genotype
] = mat
->type_likely
[genotype
] + log10(real_p_prior
[genotype
]) ;
323 if (mat
->type_prob
[genotype
] >= mat
->type_prob
[type1
] || type1
== 16)
325 // find the genotype which type_prob is biggest.
329 else if (mat
->type_prob
[genotype
] >= mat
->type_prob
[type2
] || type2
==16)
331 // find the genotype which type_prob is second biggest.
340 is_out
= true; // Check if the position needs to be output, useful in snp-only mode
342 if (para
->rank_sum_mode
)
344 rank_sum_test_value
= rank_test(sites
[j
], type1
, mat
->p_rank
, para
);
348 rank_sum_test_value
= 1.0;
350 if(rank_sum_test_value
==0.0)
352 // avoid double genotype overflow
357 q_cns
= (int)(10*(mat
->type_prob
[type1
]-mat
->type_prob
[type2
])+10*log10(rank_sum_test_value
));
360 if ( (type1
&3) == ((type1
>>2)&3))
361 { // Called Homozygous
362 if (qual1
>0 && base1
!= (type1
&3))
364 //Wired: best base is not the consensus!
367 else if (/*qual2>0 &&*/ q_cns
> qual1
-qual2
)
369 // Should not bigger than this
378 { // Called Heterozygous
379 if(sites
[j
].q_sum
[base1
]>0 && sites
[j
].q_sum
[base2
]>0 && type1
== (base1
<base2
? (base1
<<2|base2
):(base2
<<2|base1
)))
381 // The best bases are in the heterozygote
382 if (q_cns
> qual2
-qual3
)
388 { // Ok, wired things happened
400 if(! para
->glf_format
)
402 // ChrID\tPos\tRef\tCns\tQual\tBase1\tAvgQ1\tCountUni1\tCountAll1\tBase2\tAvgQ2\tCountUni2\tCountAll2\tDepth\tRank_sum\tCopyNum\tSNPstauts\n"
403 if(! para
->is_snp_only
|| (abbv
[type1
] != "ACTGNNNN"[(sites
[j
].ori
&0x7)] && sites
[j
].depth
> 0))
405 if(base1
<4 && base2
<4)
407 (*consensus
)<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<'\t'<<("ACTGNNNN"[(sites
[j
].ori
&0x7)])<<'\t'<<abbv
[type1
]<<'\t'<<q_cns
<<'\t'
408 <<("ACTGNNNN"[base1
])<<'\t'<<(sites
[j
].q_sum
[base1
]==0?0:sites
[j
].q_sum
[base1
]/sites
[j
].count_uni
[base1
])<<'\t'<<sites
[j
].count_uni
[base1
]<<'\t'<<sites
[j
].count_all
[base1
]<<'\t'
409 <<("ACTGNNNN"[base2
])<<'\t'<<(sites
[j
].q_sum
[base2
]==0?0:sites
[j
].q_sum
[base2
]/sites
[j
].count_uni
[base2
])<<'\t'<<sites
[j
].count_uni
[base2
]<<'\t'<<sites
[j
].count_all
[base2
]<<'\t'
410 <<sites
[j
].depth
<<'\t'<<rank_sum_test_value
<<'\t'<<(sites
[j
].depth
==0?255:(double)(sites
[j
].repeat_time
)/sites
[j
].depth
)<<'\t'<<((sites
[j
].ori
&8)?1:0)<<"\n";
414 (*consensus
)<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<'\t'<<("ACTGNNNN"[(sites
[j
].ori
&0x7)])<<'\t'<<abbv
[type1
]<<'\t'<<q_cns
<<'\t'
415 <<("ACTGNNNN"[base1
])<<'\t'<<(sites
[j
].q_sum
[base1
]==0?0:sites
[j
].q_sum
[base1
]/sites
[j
].count_uni
[base1
])<<'\t'<<sites
[j
].count_uni
[base1
]<<'\t'<<sites
[j
].count_all
[base1
]<<'\t'
417 <<sites
[j
].depth
<<'\t'<<rank_sum_test_value
<<'\t'<<(sites
[j
].depth
==0?255:(double)(sites
[j
].repeat_time
)/sites
[j
].depth
)<<'\t'<<((sites
[j
].ori
&8)?1:0)<<"\n";
421 (*consensus
)<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<"\tN\tN\t0\tN\t0\t0\t0\tN\t0\t0\t0\t0\t1.000\t255.000\t0"<<"\n";
427 if(base1
<4 && base2
<4)
429 baseinfo
<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<'\t'<<("ACTGNNNN"[(sites
[j
].ori
&0x7)])<<'\t'<<abbv
[type1
]<<'\t'<<q_cns
<<'\t'
430 <<("ACTGNNNN"[base1
])<<'\t'<<(sites
[j
].q_sum
[base1
]==0?0:sites
[j
].q_sum
[base1
]/sites
[j
].count_uni
[base1
])<<'\t'<<sites
[j
].count_uni
[base1
]<<'\t'<<sites
[j
].count_all
[base1
]<<'\t'
431 <<("ACTGNNNN"[base2
])<<'\t'<<(sites
[j
].q_sum
[base2
]==0?0:sites
[j
].q_sum
[base2
]/sites
[j
].count_uni
[base2
])<<'\t'<<sites
[j
].count_uni
[base2
]<<'\t'<<sites
[j
].count_all
[base2
]<<'\t'
432 <<sites
[j
].depth
<<'\t'<<showpoint
<<rank_sum_test_value
<<'\t'<<(sites
[j
].depth
==0?255:(double)(sites
[j
].repeat_time
)/sites
[j
].depth
)<<'\t'<<((sites
[j
].ori
&8)?1:0)<<"\n";
436 baseinfo
<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<'\t'<<("ACTGNNNN"[(sites
[j
].ori
&0x7)])<<'\t'<<abbv
[type1
]<<'\t'<<q_cns
<<'\t'
437 <<("ACTGNNNN"[base1
])<<'\t'<<(sites
[j
].q_sum
[base1
]==0?0:sites
[j
].q_sum
[base1
]/sites
[j
].count_uni
[base1
])<<'\t'<<sites
[j
].count_uni
[base1
]<<'\t'<<sites
[j
].count_all
[base1
]<<'\t'
439 <<sites
[j
].depth
<<'\t'<<showpoint
<<rank_sum_test_value
<<'\t'<<(sites
[j
].depth
==0?255:(double)(sites
[j
].repeat_time
)/sites
[j
].depth
)<<'\t'<<((sites
[j
].ori
&8)?1:0)<<"\n";
443 baseinfo
<<call_name
<<'\t'<<(sites
[j
].pos
+1)<<"\tN\tN\t0\tN\t0\t0\t0\tN\t0\t0\t0\t0\t1.000\t255.000\t0"<<"\n";
446 //if(sites[j].pos == 250949) {
450 // delete [] real_p_prior;
451 // delete [] pcr_dep_count;
452 // delete [] likelihoods;
456 //update 2010-11-08 guyue
459 * FUNCTION: call the consensus from soap file .
463 int Call_win::soap2cns(igzstream
& alignment
, gzoutstream
* consensus
, my_ofstream
& baseinfo
, Genome
* genome
, Prob_matrix
* mat
, Parameter
* para
, SfsMethod
&sfsMethod
, const int id
)
466 current_chr
= genome
->chromosomes
.end();
467 for(std::string line
; getline(alignment
, line
);)
469 std::istringstream
ss(line
);
471 deal_read(soap
, consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
);
474 deal_tail(consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
);
482 /* program by Bill */
483 int Call_win::soap2cns(SamCtrl
&alignment
, gzoutstream
* consensus
, my_ofstream
& baseinfo
, Genome
* genome
, Prob_matrix
* mat
, Parameter
* para
, SfsMethod
&sfsMethod
, const int id
) {
485 current_chr
= genome
->chromosomes
.end();
488 while((r
= alignment
.readline(line
)) >=0) {
489 line
= alignment_format(line
); //from sam/bam to soap file
491 if (line
== NOUSE_ALIGNMENT
)
493 std::istringstream
ss(line
);
495 deal_read(soap
, consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
);
498 deal_tail(consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
);
507 * FUNCTION: deal with the win.
508 * PARAMETER: current_chr: is the chr to be processed. last_start: the last position that be processed
509 * consensus: output file handle. genome: is the point to the Genome.
510 * mat: the matrix. para: the parameter.
511 * sfsMethod: a SfsMethod object, use to sfs.
512 * id: the individual's id, use to sfs
515 void Call_win::pro_win(gzoutstream
* consensus
, my_ofstream
& baseinfo
, Genome
* genome
, Prob_matrix
* mat
, Parameter
*para
, SfsMethod
&sfsMethod
, const int id
)
517 //done_pro_win = true; // 2011-1-20
519 if (current_chr
== genome
->chromosomes
.end())
524 if (!para
->region_only
|| current_chr
->second
->is_in_region_win(last_start
))
526 //cerr<<"at"<<soap.get_pos()<<"Called"<<last_start<<endl;
527 if(last_start
> sites
[win_size
-1].pos
)
529 initialize((last_start
/win_size
)*win_size
);
531 call_cns(current_chr
->first
, current_chr
->second
, win_size
, mat
, para
, consensus
, baseinfo
, sfsMethod
, id
);
532 last_start
= sites
[win_size
-1].pos
; //last start is the end position of the process window
536 if (!para
->region_only
)
538 //cerr<<"recycled"<<last_start<<endl;
541 last_start
= sites
[win_size
-1].pos
;
545 //cerr<<"recycled"<<" "<<last_start<<endl;
546 /* if (last_start>(sites[win_size-1].pos)) {
547 cerr<<"Unexpected "<<last_start<<">"<<(sites[win_size-1].pos)<<endl;
549 if ((last_start+1)%win_size!=0) {
550 cerr<<"Assertion Error!"<<last_start<<">"<<sites[win_size-1].pos<<endl;
552 //cerr << last_satrt <<" "<<sites[win_size-1].pos<<endl;
554 initialize(last_start-win_size+1);
557 if (current_chr
->second
->is_in_region_win(sites
[win_size
].pos
))
563 deep_init(sites
[win_size
].pos
);
567 last_start
= sites
[win_size
-1].pos
;
571 assert((last_start
+1)%win_size
==0);
572 last_start
+= win_size
;
578 * FUNCTION: deal with one read.
579 * PARAMETER: recycled: the flag.
580 * soap: the soapformat to be processed. soap: SOAP format read from the file.
581 * consensus: output file handle. genome: is the point to the Genome.
582 * mat: the matrix. para: the parameter.
583 * sfsMethod: a SfsMethod object, use to sfs.
584 * id: the individual's id, use to sfs
587 void Call_win::deal_read(Soap_format
&soap
, gzoutstream
* consensus
, my_ofstream
& baseinfo
, Genome
* genome
, Prob_matrix
* mat
, Parameter
* para
, SfsMethod
&sfsMethod
, const int id
)
590 //cerr<<"A"<<soap.get_pos()<<endl;
592 if(soap
.get_pos() < 0)
596 // first time deal_read or come to a new chromose
597 if (current_chr
== genome
->chromosomes
.end() || current_chr
->first
!= soap
.get_chr_name())
599 //come to a new chromose
600 if(current_chr
!= genome
->chromosomes
.end())
603 while(current_chr
->second
->length() > sites
[win_size
-1].pos
&& current_chr
->second
->length() > last_start
&& (current_chr
->second
->get_region() != NULL
|| current_chr
->second
->m_region_len
> last_start
))
605 pro_win(consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
);
609 if(last_start
> sites
[win_size
-1].pos
)
611 initialize((last_start
/win_size
)*win_size
);
613 call_cns(current_chr
->first
, current_chr
->second
, current_chr
->second
->length()%win_size
, mat
, para
, consensus
, baseinfo
, sfsMethod
, id
);
617 current_chr
= genome
->chromosomes
.find(soap
.get_chr_name());
619 /*if (last_start < current_chr->second->getStartPos())
620 last_start = current_chr->second->getStartPos();*/
621 last_start
= current_chr
->second
->getStartPos();
622 //initialize((last_start/win_size)*win_size);
623 // use deep_init to make sites to be pure. 2010-12-16 Bill.
624 deep_init((last_start
/win_size
)*win_size
);
625 cerr
<<"Processing "<<current_chr
->first
<<endl
;
631 //the end of this read is out of the chromorese
632 if (soap
.get_pos()+soap
.get_read_len() >= current_chr
->second
->length()) {
636 //is out of targ region
637 if (para
->region_only
&& (current_chr
->second
->get_region() == NULL
|| !current_chr
->second
->is_in_region(soap
.get_pos()))) {
641 if(soap
.get_pos() < last_start
)
643 //cerr << soap.get_pos() << "<" <<last_start<<endl;
645 /*cerr<<"Errors in sorting:"<<soap.get_pos()<<"<"<<last_start<<endl;
650 //some window before the start of this soap isn't process
651 while (soap
.get_pos()/win_size
> last_start
/win_size
)
653 //if ( soap.get_pos()/win_size - last_start/win_size >1)
654 //cerr << soap.get_pos()/win_size <<" "<<last_start/win_size<<endl;
655 pro_win(consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
); //deal the pre-window
661 last_start
=soap
.get_pos();
662 static pthread_mutex_t mutex
= PTHREAD_MUTEX_INITIALIZER
;
664 for(coord
=0; coord
<soap
.get_read_len(); coord
++){
665 if( (soap
.get_pos()+coord
)/win_size
== soap
.get_pos()/win_size
) {
666 // In the same sliding window
667 sub
= (soap
.get_pos()+coord
) % win_size
;
670 sub
= (soap
.get_pos()+coord
) % win_size
+ win_size
; // Use the tail to store the info so that it won't intervene the uncalled bases
673 if (sub
>= (win_size
+ para
->read_length
))
675 // update at 2010-10-12
676 cerr
<< "\tWARNING: the parameter '-L' was wrong, you should set the longest read's length!!!"
677 << " At position : " << soap
.get_pos()
679 break; // we don't need to exit the program. juet beak;
683 sites
[sub
].depth
+= 1;
684 sites
[sub
].repeat_time
+= soap
.get_hit();
685 if((soap
.is_N(coord
)) || soap
.get_qual(coord
)<para
->q_min
|| sites
[sub
].dep_uni
>= 0xFF) {
686 // An N, low quality or meaningless huge depth
689 //just deal with the position best hit equal to 1
690 if(soap
.get_hit() == 1) {
691 //exit((fprintf(stderr, "Wo Cao!\n")));
692 sites
[sub
].dep_uni
+= 1;
693 // Update the covering info: 4x2x64x64 matrix, base x strand x q_score x read_pos, 2-1-6-6 bits for each
695 // Binary strand: 0 for plus and 1 for minus
696 sites
[sub
].base_info
[(((ubit64_t
)(soap
.get_base(coord
)&0x6)|0))<<14 | ((ubit64_t
)(soap
.get_qual(coord
)-para
->q_min
))<<8 | coord
] += 1;
699 sites
[sub
].base_info
[(((ubit64_t
)(soap
.get_base(coord
)&0x6)|1))<<14 | ((ubit64_t
)(soap
.get_qual(coord
)-para
->q_min
))<<8 | (soap
.get_read_len()-1-coord
) ] += 1;
702 sites
[sub
].count_uni
[(soap
.get_base(coord
)>>1)&3] += 1;
703 sites
[sub
].q_sum
[(soap
.get_base(coord
)>>1)&3] += (soap
.get_qual(coord
)-para
->q_min
);
705 // add by Bill at 2010-10-15
706 if (para
->sfs
!= NULL
&& soap
.get_qual(coord
) > para
->sfs
->qs
)
708 sites
[sub
].count_sfs
[(soap
.get_base(coord
)>>1)&3] += 1;
714 sites
[sub
].count_all
[(soap
.get_base(coord
)>>1)&3] += 1;
719 // processed tail of chromosome
722 * FUNCTION: processed tail of chromosome.
723 * PARAMETER: recycled: the flag. consensus: output file handle. baseinfo: output file handle.
724 * genome: is the point to the Genome.
725 * mat: the matrix. para: the parameter.
726 * sfsMethod: a SfsMethod object, use to sfs.
727 * id: the individual's id, use to sfs
730 void Call_win::deal_tail(gzoutstream
* consensus
, my_ofstream
& baseinfo
, Genome
* genome
, Prob_matrix
* mat
, Parameter
* para
, SfsMethod
&sfsMethod
, const int id
)
732 //The unprocessed tail of chromosome
734 while(current_chr
->second
->length() > sites
[win_size
-1].pos
&& current_chr
->second
->length() > last_start
&& (current_chr
->second
->get_region() != NULL
|| current_chr
->second
->m_region_len
> last_start
)) {
736 pro_win(consensus
, baseinfo
, genome
, mat
, para
, sfsMethod
, id
);
740 if(last_start
> sites
[win_size
-1].pos
) {
741 initialize((last_start
/win_size
)*win_size
);
743 call_cns(current_chr
->first
, current_chr
->second
, current_chr
->second
->length()%win_size
, mat
, para
, consensus
, baseinfo
, sfsMethod
, id
);