limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / BGI / soap_src / soap_builder / BWTSearch.c
blobd983ff2496fad133f8b68e7c356feb2c10687f02
1 #include "BWTSearch.h"
3 unsigned int REVBWTForwardSearch(const unsigned char *convertedkey, const unsigned int keylength, const BWT *rev_bwt, unsigned int *resultsaindexleft, unsigned int *resultsaindexright, unsigned int *rev_resultsaindexleft, unsigned int *rev_resultsaindexright) {
5 unsigned int sacount=0;
6 unsigned int rev_startsaindex, rev_endsaindex;
7 unsigned int startsaindex, endsaindex;
8 unsigned int pos = 1;
9 int i;
10 unsigned int c = convertedkey[0];
11 unsigned int occcount_start[4];
12 unsigned int occcount_end[4];
13 unsigned int occcount[4];
15 rev_startsaindex = rev_bwt->cumulativeFreq[convertedkey[0]]+1;
16 rev_endsaindex = rev_bwt->cumulativeFreq[convertedkey[0]+1];
17 startsaindex = rev_bwt->cumulativeFreq[convertedkey[0]]+1;
18 endsaindex = rev_bwt->cumulativeFreq[convertedkey[0]+1];
20 while (pos < keylength && startsaindex <= endsaindex) {
21 c = convertedkey[pos];
23 BWTAllOccValue(rev_bwt,rev_startsaindex,occcount_start);
24 BWTAllOccValue(rev_bwt,rev_endsaindex + 1,occcount_end);
26 rev_startsaindex = rev_bwt->cumulativeFreq[c] + occcount_start[c] + 1;
27 rev_endsaindex = rev_bwt->cumulativeFreq[c] + occcount_end[c];
29 occcount[3]=0;
30 for (i=2;i>=0;i--) {
31 occcount[i]=occcount[i+1]+occcount_end[i+1]-occcount_start[i+1];
34 endsaindex = endsaindex - occcount[c];
35 startsaindex = endsaindex - (rev_endsaindex-rev_startsaindex);
36 pos++;
39 *resultsaindexleft = startsaindex;
40 *resultsaindexright = endsaindex;
41 *rev_resultsaindexleft = rev_startsaindex;
42 *rev_resultsaindexright = rev_endsaindex;
44 sacount+=endsaindex-startsaindex+1;
45 // number of occurrence = endsaindex - startsaindex + 1
46 return sacount;
51 unsigned int REVBWTContForwardSearch(const unsigned char *convertedkey, const unsigned int start, const unsigned int len, const BWT *rev_bwt, unsigned int *sal, unsigned int *sar, unsigned int *rev_sal, unsigned int *rev_sar) {
53 unsigned int sacount=0;
54 unsigned int pos = start;
55 unsigned char c;
56 unsigned int occcount_start[4];
57 unsigned int occcount_end[4];
58 unsigned int occcount[4];
59 int k;
60 while (pos < start+len && *sal <= *sar) {
61 c = convertedkey[pos];
63 BWTAllOccValue(rev_bwt,*rev_sal,occcount_start);
64 BWTAllOccValue(rev_bwt,*rev_sar + 1,occcount_end);
66 *rev_sal = rev_bwt->cumulativeFreq[c] + occcount_start[c] + 1;
67 *rev_sar = rev_bwt->cumulativeFreq[c] + occcount_end[c];
69 occcount[3]=0;
70 for (k=2;k>=0;k--) {
71 occcount[k]=occcount[k+1]+occcount_end[k+1]-occcount_start[k+1];
74 *sar = *sar - occcount[c];
75 *sal = *sar - (*rev_sar-*rev_sal);
77 pos++;
79 sacount+=*sar-*sal+1;
80 return sacount;
83 unsigned int BWTContBackwardSearch(const unsigned char *convertedkey, const unsigned int start, const unsigned int len, const BWT *bwt, unsigned int *sal, unsigned int *sar) {
85 unsigned int sacount=0;
86 unsigned int pos = len;
87 unsigned char c;
89 if (*sal > *sar) {
90 return 0;
93 while (pos > 0 && *sal <= *sar) {
94 c = convertedkey[pos-1];
95 *sal = bwt->cumulativeFreq[c] + BWTOccValue(bwt, *sal, c) + 1;
96 *sar = bwt->cumulativeFreq[c] + BWTOccValue(bwt, *sar + 1, c);
97 pos--;
99 sacount+=*sar-*sal+1;
100 return sacount;
103 unsigned int BWTBackward1Error(char *querypattern, int chain, BWT *bwt, unsigned int start, unsigned int len, unsigned int pl, unsigned int pr, unsigned int allele2, HitInfo *hits, unsigned int *numOfHits) {
104 unsigned int mk_l=1,mk_r=0;
105 unsigned int occcount_pstart[4];
106 unsigned int occcount_pend[4];
107 unsigned int occcount_start[4];
108 unsigned int occcount_end[4];
109 unsigned int occcount[4];
110 unsigned int sacount=0;
112 unsigned char c;
113 unsigned char ec;
114 int i;
115 unsigned int allele1;
116 //printf("bwtbackward1error %u %u\n",pl,pr);
117 // v--start v----start+len
118 // querypattern = xxxxxxxxxxxxxxxxx[xxxxxxxxxxxxxx]xxxxxx
119 // <------- search direction
120 // querypattern[start+len-1], querypattern[start+len-2]...querypattern[start]
121 // for i=0 to len-1,
122 // append querypatter[start+len-1-i]!
124 for (i=0;(i<len && pl<=pr);i++) {
125 //call once only proceduressssss - great
126 BWTAllOccValue(bwt,pl,occcount_pstart);
127 BWTAllOccValue(bwt,pr + 1,occcount_pend);
129 //backward manner
130 for (ec=0;ec<4;ec++) {
131 if (querypattern[start+len-1-i]==ec)
132 continue;
134 allele1= ((((start+len-1-i) & 0xfff)<<2) | (ec & 0x3));
135 mk_l=pl;
136 mk_r=pr;
138 unsigned int pos = i+1;
139 mk_l = bwt->cumulativeFreq[ec] + occcount_pstart[ec] + 1;
140 mk_r = bwt->cumulativeFreq[ec] + occcount_pend[ec];
142 if (BWTContBackwardSearch(querypattern,start,len-i-1,bwt,&mk_l,&mk_r)) {
143 // printf("%d\t", start+len-1-i);
144 // printf("%d\n", ec);
145 // printf("%d\t%d\n",allele1, allele2);
146 OCCProcess(mk_l,mk_r, chain, allele1, allele2, hits, numOfHits);
147 sacount+=mk_r-mk_l+1;
150 c = querypattern[start+len-1-i];
151 pl = bwt->cumulativeFreq[c] + occcount_pstart[c] + 1;
152 pr = bwt->cumulativeFreq[c] + occcount_pend[c];
155 return sacount;
159 unsigned int REVBWTForward1Error(char *querypattern, int chain, BWT * bwt,BWT * rev_bwt, unsigned int start,unsigned int len,unsigned int pl,unsigned int pr,unsigned int rev_pl,unsigned int rev_pr, unsigned int allele2, HitInfo *hits, unsigned int *numOfHits) {
161 unsigned int mk_l=1,mk_r=0,rev_mk_l,rev_mk_r;
162 unsigned int occcount_pstart[4];
163 unsigned int occcount_pend[4];
164 unsigned int occcountp[4];
165 unsigned int occcount_start[4];
166 unsigned int occcount_end[4];
167 unsigned int occcount[4];
168 unsigned int sacount=0;
170 unsigned char c;
171 unsigned char ec;
172 unsigned int i;
173 int k;
174 unsigned int allele1;
175 for (i=0;(i<len && pl<=pr);i++) {
176 //call once only proceduressssss - great
177 BWTAllOccValue(rev_bwt,rev_pl,occcount_pstart);
178 BWTAllOccValue(rev_bwt,rev_pr + 1,occcount_pend);
180 occcountp[3]=0;
181 for (k=2;k>=0;k--) {
182 occcountp[k]=occcountp[k+1]+occcount_pend[k+1]-occcount_pstart[k+1];
186 //forward manner
187 for (ec=0;ec<4;ec++) {
188 if (querypattern[start+i]==ec)
189 continue;
191 allele1= ((((start+i) & 0xfff)<<2) | (ec & 0x3));
192 mk_l=pl;
193 mk_r=pr;
194 rev_mk_l=rev_pl;
195 rev_mk_r=rev_pr;
197 unsigned int pos = i+1;
199 rev_mk_l = rev_bwt->cumulativeFreq[ec] + occcount_pstart[ec] + 1;
200 rev_mk_r = rev_bwt->cumulativeFreq[ec] + occcount_pend[ec];
202 mk_r = mk_r - occcountp[ec];
203 mk_l = mk_r - (rev_mk_r-rev_mk_l);
205 if (REVBWTContForwardSearch(querypattern,start+pos,len-i-1,rev_bwt,&mk_l,&mk_r,&rev_mk_l,&rev_mk_r)) {
206 // printf("%d\t", start+i);
207 // printf("%d\n", ec);
208 OCCProcess(mk_l,mk_r, chain, allele1, allele2, hits, numOfHits);
209 //return mk_l, mk_r
210 sacount+=mk_r-mk_l+1;
213 c = querypattern[start+i];
215 rev_pl = rev_bwt->cumulativeFreq[c] + occcount_pstart[c] + 1;
216 rev_pr = rev_bwt->cumulativeFreq[c] + occcount_pend[c];
218 pr = pr - occcountp[c];
219 pl = pr - (rev_pr-rev_pl);
221 return sacount;