3 2BWT-Sample.c SAMPLE CODE FOR 2BWT-LIB
5 Copyright (C) 2011, Edward Wu.
6 Website: http://www.cs.hku.hk/~mkewu/2bwt
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 Change : Sample file to use 2BWT index.
26 This program performs the following with 2BWT library
27 1. BWT backward search on a given pattern
28 2. BWT forward search on a given pattern
29 3. BWT bi-directional search on a given pattern
30 3. BWT 1-mismatch search on a given pattern
31 4. Report the first occurrences of (1).
33 For complete guide on what the 2BWT index library provided
34 please check out the file 2BWT-Interface.h for the description
41 #include "2BWT-Interface.h"
46 //Variables for backward and forward search
47 unsigned long long l
,r
,rev_l
,rev_r
;
49 //Variables for search all sa ranges functions
50 unsigned long long result_l
[ALPHABET_SIZE
];
51 unsigned long long result_r
[ALPHABET_SIZE
];
52 unsigned long long result_rev_l
[ALPHABET_SIZE
];
53 unsigned long long result_rev_r
[ALPHABET_SIZE
];
55 //Variables for result
56 unsigned long long offset
;
57 unsigned int sequenceId
;
58 unsigned long long saCount
;
60 //Variables for pattern
62 strcpy(pattern
,"AACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAA");
63 int patternLength
= strlen(pattern
);
65 // Load up the index with the below statement
66 printf("Loading index ... ");
68 Idx2BWT
* idx2BWT
= BWTLoad2BWT("ncbi.genome.fa.index_64.a8",".sa4");
71 // Convert the pattern into 2BWT recognised coding scheme
72 BWTConvertPattern(idx2BWT
,pattern
,patternLength
,(unsigned char*) pattern
);
79 // The following performs a backward search of the pattern
80 // ===================================================================================
82 printf("Performing backward search of the pattern..\n");
83 BWTSARangeInitial(idx2BWT
,pattern
[patternLength
-1],&l
,&r
);
84 for (i
=patternLength
-2;i
>=0;i
--) {
85 BWTSARangeBackward(idx2BWT
,pattern
[i
],&l
,&r
);
87 printf("SA Range being = %llu %llu (%llu)\n\n",l
,r
,r
-l
+1);
89 // ===================================================================================
99 // The following performs a forward search of the pattern
100 // ===================================================================================
102 printf("Performing forward search of the pattern..\n");
103 BWTSARangeInitial(idx2BWT
,pattern
[0],&l
,&r
);
104 BWTSARangeInitial(idx2BWT
,pattern
[0],&rev_l
,&rev_r
);
105 for (i
=1;i
<patternLength
;i
++) {
106 BWTSARangeForward_Bidirection(idx2BWT
,pattern
[i
],&l
,&r
,&rev_l
,&rev_r
);
108 printf("SA Range being = %llu %llu %llu %llu (%llu)\n\n",l
,r
,rev_l
,rev_r
,r
-l
+1);
110 // ===================================================================================
113 // The following performs a bi-directional search of the pattern
114 // Starting from the middle of the pattern, first move right, then move left.
115 // ===================================================================================
117 printf("Performing bi-directional search of the pattern..\n");
118 j
= patternLength
/ 2;
119 BWTSARangeInitial(idx2BWT
,pattern
[j
],&l
,&r
);
120 BWTSARangeInitial(idx2BWT
,pattern
[j
],&rev_l
,&rev_r
);
121 for (i
=j
+1;i
<patternLength
;i
++) {
122 BWTSARangeForward_Bidirection(idx2BWT
,pattern
[i
],&l
,&r
,&rev_l
,&rev_r
);
124 for (i
=j
-1;i
>=0;i
--) {
125 BWTSARangeBackward_Bidirection(idx2BWT
,pattern
[i
],&l
,&r
,&rev_l
,&rev_r
);
127 printf("SA Range being = %llu %llu %llu %llu (%llu)\n\n",l
,r
,rev_l
,rev_r
,r
-l
+1);
129 // ===================================================================================
132 // The following performs a 1-mismatch search of the pattern
133 // ===================================================================================
136 printf("Performing 1-mismatch search of the pattern..\n");
138 j
= patternLength
/ 2;
139 BWTSARangeInitial(idx2BWT
,pattern
[patternLength
-1],&l
,&r
);
140 for (i
=patternLength
-2;i
>j
-1;i
--) { BWTSARangeBackward(idx2BWT
,pattern
[i
],&l
,&r
); }
142 for (i
=j
-1;i
>=0;i
--) {
143 BWTAllSARangesBackward(idx2BWT
,l
,r
,result_l
,result_r
);
144 for (c
=0;c
<ALPHABET_SIZE
;c
++) {
145 if (c
==pattern
[i
]) continue;
146 unsigned long long err_l
=result_l
[c
];
147 unsigned long long err_r
=result_r
[c
];
148 for (k
=i
-1;k
>=0;k
--) {
149 if (err_l
>err_r
) break;
150 BWTSARangeBackward(idx2BWT
,pattern
[k
],&err_l
,&err_r
);
152 if (err_l
<=err_r
&& k
<0) {
153 //An SA range of occurrence is found (err_l,err_r)
154 saCount
+=err_r
-err_l
+1;
157 l
=result_l
[pattern
[i
]];
158 r
=result_r
[pattern
[i
]];
161 BWTSARangeInitial(idx2BWT
,pattern
[0],&l
,&r
);
162 BWTSARangeInitial(idx2BWT
,pattern
[0],&rev_l
,&rev_r
);
163 for (i
=1;i
<j
;i
++) { BWTSARangeForward_Bidirection(idx2BWT
,pattern
[i
],&l
,&r
,&rev_l
,&rev_r
); }
164 for (i
=j
;i
<patternLength
;i
++) {
165 BWTAllSARangesForward_Bidirection(idx2BWT
,l
,r
,rev_l
,rev_r
,result_l
,result_r
,result_rev_l
,result_rev_r
);
166 for (c
=0;c
<ALPHABET_SIZE
;c
++) {
167 if (c
==pattern
[i
]) continue;
168 unsigned long long err_l
=result_l
[c
];
169 unsigned long long err_r
=result_r
[c
];
170 unsigned long long rev_err_l
=result_rev_l
[c
];
171 unsigned long long rev_err_r
=result_rev_r
[c
];
172 for (k
=i
+1;k
<patternLength
;k
++) {
173 if (err_l
>err_r
) break;
174 BWTSARangeForward_Bidirection(idx2BWT
,pattern
[k
],&err_l
,&err_r
,&rev_err_l
,&rev_err_r
);
176 if (err_l
<=err_r
&& k
>=patternLength
) {
177 //An SA range of occurrence is found (err_l,err_r)
178 saCount
+=err_r
-err_l
+1;
181 l
=result_l
[pattern
[i
]];
182 r
=result_r
[pattern
[i
]];
183 rev_l
=result_rev_l
[pattern
[i
]];
184 rev_r
=result_rev_r
[pattern
[i
]];
186 printf("%llu SA-indexes/occurrences were found.\n\n",saCount
);
189 // ===================================================================================
196 // The following output the first 5 position of the pattern
197 // ===================================================================================
201 printf("Reporting %d arbitrary occurrences..\n",j
);
203 BWTRetrievePositionFromSAIndex(idx2BWT
,l
+i
,&sequenceId
,&offset
);
204 printf("Occurrence found in sequence #%u with offset %llu\n",sequenceId
,offset
);
208 // ===================================================================================
215 // Free up the 2BWT index
216 printf("\nFree index ... ");
218 BWTFree2BWT(idx2BWT
);