modified: makefile
[GalaxyCodeBases.git] / tools / bwt / dcs-bwt / test / prefix_doubling_test.cc
blob3217d8076b075b85e9abe8d1c65e7638d8ea3e4c
1 // Copyright 2007 Google Inc.
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 #include <iostream>
18 #include <string>
19 #include <functional>
20 #include <vector>
21 #include <algorithm>
22 #include <cassert>
24 #include "../src/prefix_doubling-inl.h"
25 #include "../src/prefix_doubling.h"
27 namespace {
29 using dcsbwt::SortSuffixesByPrefixDoubling;
30 using dcsbwt::PrefixDoubler;
32 // This verification is based on Theorem 2 in:
34 // Burkhardt, Karkkainen:
35 // Fast lightweight suffix array construction and checking.
36 // Proc. 14th Symposium on Combinatorial Pattern Matching,
37 // LNCS 2676, Springer, 2003, pp. 55-69.
39 void Verify(std::string const& str, std::vector<int> const& rank_array,
40 std::vector<int> const& suffix_array) {
41 int str_length = str.length();
42 assert(suffix_array[0] == str_length);
43 assert(rank_array[str_length] == 0);
44 for (int rank = 1; rank <= str_length; ++rank) { // yes, I mean "<="
45 int suffix = suffix_array[rank];
46 assert(suffix >= 0);
47 assert(suffix < str_length);
48 assert(rank_array[suffix] == rank);
50 for (int rank = 1; rank < str_length; ++rank) { // yes, I mean "<"
51 int suffix1 = suffix_array[rank];
52 int suffix2 = suffix_array[rank + 1];
53 unsigned char first_char1 = static_cast<unsigned char>(str[suffix1]);
54 unsigned char first_char2 = static_cast<unsigned char>(str[suffix2]);
55 assert(first_char1 <= first_char2);
56 if (first_char1 == first_char2) {
57 assert(rank_array[suffix1 + 1] < rank_array[suffix2 + 1]);
62 void ConstructAndVerify(std::string const& str) {
63 std::vector<int> rank_array(str.size() + 1);
64 std::vector<int> suffix_array(str.size() + 1);
65 SortSuffixesByPrefixDoubling(str.begin(), str.end(),
66 rank_array.begin(), rank_array.end(),
67 suffix_array.begin(), suffix_array.end());
68 Verify(str, rank_array, suffix_array);
71 void TestEmpty() {
72 std::string str;
73 ConstructAndVerify(str);
74 std::cout << "TestEmpty passed" << std::endl;
78 void TestLengthOne() {
79 std::string str("\0");
80 ConstructAndVerify(str);
81 str[0] = '\xFF';
82 ConstructAndVerify(str);
83 std::cout << "TestLengthOne passed" << std::endl;
87 void TestBanana() {
88 std::string str("banana");
89 ConstructAndVerify(str);
90 std::cout << "TestBanana passed" << std::endl;
93 char const * const dna_512 =
94 "CACTGTGCCATCATCATCACCACCACTGTCATTATCACCACCACCATCATCACCAACACCACTG"
95 "CCATCGTCATCACCACCACTGTCATTATCACCACCACCATCACCAACATCACCACCACCATTAT"
96 "CACCACCATCAACACCACCACCCCCATCATCATCATCACTACTACCATCATTACCAGCACCACC"
97 "ACCACTATCACCACCACCACCACAATCACCATCACCACTATCATCAACATCATCACTACCACCA"
98 "TCACCAACACCACCATCATTATCACCACCACCACCATCACCAACATCACCACCATCATCATCAC"
99 "CACCATCACCAAGACCATCATCATCACCATCACCACCAACATCACCACCATCACCAACACCACC"
100 "ATCACCACCACCACCACCATCATCACCACCACCACCATCATCATCACCACCACCGCCATCATCA"
101 "TCGCCACCACCATGACCACCACCATCACAACCATCACCACCATCACAACCACCATCATCACTAT";
103 void TestDNA() {
104 std::string str(dna_512);
105 ConstructAndVerify(str);
106 std::cout << "TestDNA passed" << std::endl;
109 char const * const aaa_256 =
110 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
111 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
112 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
113 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
115 void TestAAAA() {
116 std::string str(aaa_256);
117 std::vector<int> rank_array(str.size() + 1);
118 std::vector<int> suffix_array(str.size() + 1);
119 SortSuffixesByPrefixDoubling(str.begin(), str.end(),
120 rank_array.begin(), rank_array.end(),
121 suffix_array.begin(), suffix_array.end());
122 Verify(str, rank_array, suffix_array);
123 std::vector<int>::iterator it =
124 std::adjacent_find(suffix_array.begin(), suffix_array.end(),
125 std::less_equal<int>());
126 assert(suffix_array.end() - it == 0);
127 std::cout << "TestAAAA passed" << std::endl;
130 void TestAAAB() {
131 std::string str("AAAAAAAAB");
132 std::vector<int> rank_array(str.size() + 1);
133 std::vector<int> suffix_array(str.size() + 1);
134 SortSuffixesByPrefixDoubling(str.begin(), str.end(),
135 rank_array.begin(), rank_array.end(),
136 suffix_array.begin(), suffix_array.end());
137 Verify(str, rank_array, suffix_array);
138 std::vector<int>::iterator it =
139 std::adjacent_find(suffix_array.begin() + 1, suffix_array.end(),
140 std::greater_equal<int>());
141 assert(suffix_array.end() - it == 0);
142 std::cout << "TestAAAB passed" << std::endl;
145 // Two strings with the same order of suffixes.
146 void TestSameOrder() {
147 std::string str1("AAAAAAAAB");
148 std::vector<int> rank_array(str1.size() + 1);
149 std::vector<int> suffix_array(str1.size() + 1);
150 SortSuffixesByPrefixDoubling(str1.begin(), str1.end(),
151 rank_array.begin(), rank_array.end(),
152 suffix_array.begin(), suffix_array.end());
153 std::string str2("XXXXXXXXY");
154 Verify(str2, rank_array, suffix_array);
155 std::cout << "TestSameOrder passed" << std::endl;
158 // Check that '\xFF' > 'A'
159 void TestAscii255() {
160 std::string str1("AAAAAAAAB");
161 std::vector<int> rank_array(str1.size() + 1);
162 std::vector<int> suffix_array(str1.size() + 1);
163 SortSuffixesByPrefixDoubling(str1.begin(), str1.end(),
164 rank_array.begin(), rank_array.end(),
165 suffix_array.begin(), suffix_array.end());
166 std::string str2("AAAAAAAA\xFF");
167 Verify(str2, rank_array, suffix_array);
168 std::cout << "TestAscii255 passed" << std::endl;
171 void TestComputeOneOrder() {
172 std::string str(dna_512);
173 std::vector<int> rank_array(str.size() + 1);
174 std::vector<int> suffix_array(str.size() + 1);
175 PrefixDoubler<int> pd(str.size());
176 pd.ComputeOneOrder(str.begin(), str.end(),
177 rank_array.begin(), rank_array.end(),
178 suffix_array.begin(), suffix_array.end());
179 pd.SortSuffixes(rank_array.begin(), rank_array.end(),
180 suffix_array.begin(), suffix_array.end(), 1);
181 pd.ComputeFinalSuffixArray(rank_array.begin(), rank_array.end(),
182 suffix_array.begin(), suffix_array.end());
183 Verify(str, rank_array, suffix_array);
184 std::cout << "TestComputeOneOrder passed" << std::endl;
187 void TestDouble() {
188 std::string str(dna_512);
189 std::vector<int> rank_array(str.size() + 1);
190 std::vector<int> suffix_array(str.size() + 1);
191 PrefixDoubler<int> pd(str.size());
192 pd.ComputeOneOrder(str.begin(), str.end(),
193 rank_array.begin(), rank_array.end(),
194 suffix_array.begin(), suffix_array.end());
195 int order_depth = pd.Double(rank_array.begin(), rank_array.end(),
196 suffix_array.begin(), suffix_array.end(), 1);
197 pd.SortSuffixes(rank_array.begin(), rank_array.end(),
198 suffix_array.begin(), suffix_array.end(), order_depth);
199 pd.ComputeFinalSuffixArray(rank_array.begin(), rank_array.end(),
200 suffix_array.begin(), suffix_array.end());
201 Verify(str, rank_array, suffix_array);
202 std::cout << "TestDouble passed" << std::endl;
205 void TestComputeInitialSuffixArray() {
206 std::string str(dna_512);
207 std::vector<int> rank_array(str.size() + 1);
208 std::vector<int> suffix_array(str.size() + 1);
209 PrefixDoubler<int> pd(str.size());
210 pd.ComputeOneOrder(str.begin(), str.end(),
211 rank_array.begin(), rank_array.end(),
212 suffix_array.begin(), suffix_array.end());
213 pd.ComputeInitialSuffixArray(rank_array.begin(), rank_array.end(),
214 suffix_array.begin(), suffix_array.end(), 1);
215 pd.SortSuffixes(rank_array.begin(), rank_array.end(),
216 suffix_array.begin(), suffix_array.end(), 1);
217 pd.ComputeFinalSuffixArray(rank_array.begin(), rank_array.end(),
218 suffix_array.begin(), suffix_array.end());
219 Verify(str, rank_array, suffix_array);
220 std::cout << "TestComputeInitialSuffixArray passed" << std::endl;
223 } // namespace
225 int main(int argc, char **argv) {
227 TestEmpty();
228 TestLengthOne();
229 TestBanana();
230 TestDNA();
231 TestAAAA();
232 TestAAAB();
233 TestSameOrder();
234 TestAscii255();
235 TestComputeOneOrder();
236 TestComputeInitialSuffixArray();
238 return 0;