modified: SpatialOmicsCoord.py
[GalaxyCodeBases.git] / BGI / SOAPsnp / rank_sum.cc
blob249099d8cce1cae13dad2fda7a109349baca239f
1 #include "soap_snp.h"
3 int Prob_matrix::rank_table_gen() {
4 // When N <= 63, (so that n1<=31), use this table to test
5 ubit64_t i, n1, N, T1;
6 rate_t p_left, p_right;
8 // Calculate the factorials
9 double * fact = new double [64];
10 fact[0]=(double)1.0;
11 for(i=1;i!=64;i++) {
12 fact[i] = fact[i-1]*i;
15 ubit64_t * rank_sum= new ubit64_t [64*64*2048]; // 6bit: N; 5bit: n1; 11bit; T1
16 memset(rank_sum, 0, sizeof(ubit64_t)*64*64*2048);
17 rank_sum[0]=1;
18 for(N=1;N!=64;N++) {
19 //cerr<<N<<endl;
20 for(n1=0;n1<=N;n1++) {
21 //cerr<<n1<<endl;
22 for(T1=(1+n1)*n1/2;T1<=(N+N-n1+1)*n1/2;T1++) {
23 //cerr<<T1<<endl;
24 // Dynamic programming to generate the table
25 rank_sum[N<<17|n1<<11|T1] = rank_sum[((N-1)<<17)|(n1<<11)|T1] + ((T1>=N && n1>0) ? rank_sum[((N-1)<<17)|((n1-1)<<11)|(T1-N)]:0);
26 // Here, the p_rank is not cumulative
27 p_rank[(N<<17)|(n1<<11)|T1] = rank_sum[N<<17|n1<<11|T1] / (fact[N]/(fact[n1]*fact[N-n1]));
28 //cerr<<p_rank[(N<<17)|(n1<<11)|T1]<<endl;
30 p_left = 0.0, p_right =1.0;
31 for(T1=(1+n1)*n1/2;T1<=(N+N-n1+1)*n1/2;T1++) {
32 p_right = 1.0 - p_left;
33 p_left += p_rank[(N<<17)|(n1<<11)|T1];
34 p_rank[N<<17|n1<<11|T1] = (p_left<p_right?p_left:p_right);
35 //std::cerr<<N<<"\t"<<n1<<"\t"<<T1<<"\t"<<p_rank[(N<<17)|(n1<<11)|T1]<<endl;
39 delete [] rank_sum;
40 delete [] fact;
41 return 1;
44 double Call_win::normal_test(int n1, int n2, double T1, double T2) {
45 double u1, u2;
46 u1 = (T1 - n1*(n1+n2+1)/2) / sqrt(n1*n2*(n1+n2+1)/(double)12);
47 u2 = (T2 - n2*(n1+n2+1)/2) / sqrt(n1*n2*(n1+n2+1)/(double)12);
48 return normal_value(fabs(u1)>fabs(u2)?u1:u2);
51 double Call_win::table_test(double *p_rank, int n1, int n2, double T1, double T2) {
52 if(n1<=n2) {
53 return p_rank[(n1+n2)<<17|n1<<11|(int)(T1)]+(T1-(int)T1)*(p_rank[(n1+n2)<<16|n1<<11|(int)(T1+1)]-p_rank[(n1+n2)<<17|n1<<11|(int)(T1)]);
55 else {
56 return p_rank[(n1+n2)<<17|n2<<11|(int)(T2)]+(T2-(int)T2)*(p_rank[(n1+n2)<<16|n2<<11|(int)(T2+1)]-p_rank[(n1+n2)<<17|n2<<11|(int)(T2)]);
60 double Call_win::rank_test(Pos_info & info, char best_type, double * p_rank, Parameter * para) {
61 if( (best_type&3) == ((best_type>>2)&3) ) {
62 // HOM
63 return 1.0;
65 if( info.count_uni[best_type&3]==0 || info.count_uni[(best_type>>2)&3]==0) {
66 // HET with one allele...
67 return 0.0;
69 //cerr<<"RankSum:"<<info.pos<<endl;
70 //int * same_qual_count = new int [para->q_max-para->q_min+1];
71 //memset(same_qual_count, 0, sizeof(int)*(para->q_max-para->q_min+1));
72 //double * rank_array= new double [para->q_max-para->q_min+1];
73 //memset(rank_array, 0, sizeof(double)*(para->q_max-para->q_min+1));
74 int *same_qual_count = new int [64];
75 double *rank_array = new double [64];
76 memset(same_qual_count,0,sizeof(int)*64);
77 memset(rank_array,0,sizeof(double)*64);
79 int rank(0);
80 double T[4]={0.0, 0.0, 0.0, 0.0};
81 bool is_need[4] ={false,false,false,false};
82 is_need[(best_type&3)]=true; is_need[((best_type>>2)&3)]=true;
83 std::string::size_type o_base, strand;
84 int q_score, coord;
85 for(o_base=0;o_base!=4;o_base++) {
86 if(info.count_uni[o_base]==0 || !is_need[o_base]) continue;
87 for(q_score=para->q_max-para->q_min;q_score>=0;q_score--) {
88 for(coord=para->read_length-1;coord>=0;coord--) {
89 for(strand=0;strand<2;strand++) {
90 same_qual_count[q_score] += info.base_info[o_base<<15|strand<<14|q_score<<8|coord];
91 //if(info.pos==1256 && info.base_info[o_base<<13|strand<<12|q_score<<6|coord]!=0) {
92 // cerr<<info.pos<<"\t"<<q_score<<"\t"<<same_qual_count[q_score]<<"\t"<<int(info.base_info[o_base<<13|strand<<12|q_score<<6|coord])<<endl;
93 //}
98 rank = 0;
99 for(q_score=0;q_score<=(ubit64_t)(para->q_max-para->q_min+1);q_score++) {
100 rank_array[q_score]= rank+(1+same_qual_count[q_score])/2.0;
101 rank += same_qual_count[q_score];
103 for(o_base=0;o_base!=4;o_base++) {
104 if(info.count_uni[o_base]==0 || !is_need[o_base]) continue;
105 for(q_score=para->q_max-para->q_min;q_score>=0;q_score--) {
106 for(coord=para->read_length-1;coord>=0;coord--) {
107 for(strand=0;strand<2;strand++) {
108 //cerr<<"ACTG"[o_base]<<endl;
109 T[o_base] += (rank_array[q_score] * info.base_info[o_base<<15|strand<<14|q_score<<8|coord]);
110 //cerr<<T[o_base]<<endl;
115 delete [] same_qual_count;
116 delete [] rank_array;
117 //for(size_t a=0;a!=4;a++) {
118 // fprintf(stderr, "%lf\t", T[a]);
120 //fprintf(stderr, "\n");
121 //std::cerr<<"%d\t%d\t%lf\t%lf", info.count_uni[best_type&3], info.count_uni[(best_type>>2)&3], T[best_type&3], T[(best_type>>2)&3]);
122 if (info.count_uni[best_type&3]+info.count_uni[(best_type>>2)&3]<64) {
123 return table_test(p_rank, info.count_uni[best_type&3], info.count_uni[(best_type>>2)&3], T[best_type&3], T[(best_type>>2)&3]);
125 else {
126 return normal_test(info.count_uni[best_type&3], info.count_uni[(best_type>>2)&3],T[best_type&3], T[(best_type>>2)&3]);