modified: makefile
[GalaxyCodeBases.git] / tools / bwt / dcs-bwt / test / stringsort_test.cc
blob71f7059b647e727b4d8b6a325878d3c4d0eee3cb
1 // Copyright 2007 Google Inc.
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 #include "../src/stringsort-inl.h"
18 #include "../src/stringsort.h"
19 #include "../src/ternary_partition.h" // only for random numbers
20 #include "../src/inttypes.h"
22 #include <iostream>
23 #include <string>
24 #include <functional>
25 #include <vector>
26 #include <algorithm>
27 #include <cassert>
29 namespace {
31 using dcsbwt::StringsortSuffixes;
32 using dcsbwt::ternary_partition::RandomNumberGenerator;
34 void PermuteRandomly(uint32* begin, uint32* end, RandomNumberGenerator* rng) {
35 int size = end - begin;
36 for (int i = 0; i < size; ++i) {
37 std::swap(begin[i], (begin+i)[rng->Uniform(size-i)]);
41 class SortedGroupVerifier {
42 public:
43 SortedGroupVerifier(const unsigned char* text, const unsigned char* text_end,
44 uint32* suffixes, uint32* suffixes_end,
45 int prefix_length)
46 : text_(text), text_length_(text_end - text),
47 prefix_length_(prefix_length),
48 suffixes_(suffixes),
49 suffixes_end_(suffixes_end),
50 previous_begin_(suffixes), previous_end_(suffixes),
51 report_count_(text_end - text + 1, 0) {}
52 void Reset() {
53 previous_begin_ = suffixes_;
54 previous_end_ = suffixes_;
55 std::fill(report_count_.begin(), report_count_.end(), 0);
57 void FinalCheck() {
58 assert(previous_end_ - suffixes_ == suffixes_end_ - suffixes_);
59 for (const uint32* it = suffixes_; it < suffixes_end_; ++it) {
60 assert(report_count_[*it] == 1);
63 void operator() (const uint32* begin, const uint32* end) {
64 assert(begin - suffixes_ == previous_end_ - suffixes_);
65 assert(suffixes_end_ - suffixes_ >= end - suffixes_);
66 const int size = end - begin;
67 assert(size > 0);
68 assert(*begin <= text_length_);
69 for (const uint32* it = begin; it < end; ++it) {
70 ++report_count_[*it];
72 // Check that all group suffixes share a prefix of length prefix_length_
73 if (size > 1) {
74 int full_length_suffix = text_length_ - prefix_length_;
75 assert(*begin <= full_length_suffix);
76 for (const uint32* it = begin + 1; it < end; ++it) {
77 assert(*it <= full_length_suffix);
78 for (int i = 0; i < prefix_length_; ++i) {
79 assert(text_[*begin + i] == text_[*it + i]);
83 // Check that this group's suffixes are larger than previous group's
84 // First check whether there is a previous group.
85 if (previous_begin_ < previous_end_) {
86 // Compare the first elements of the groups.
87 int limit = prefix_length_;
88 limit = std::min<int>(limit, text_length_ - *previous_begin_);
89 limit = std::min<int>(limit, text_length_ - *begin);
90 int i;
91 for (i = 0; i < limit; ++i)
92 if (text_[*previous_begin_ + i] != text_[*begin + i]) break;
93 assert(i < text_length_ - *begin);
94 if (i < limit) {
95 assert(text_[*previous_begin_ + i] < text_[*begin + i]);
96 } else if (i < text_length_ - *previous_begin_) {
97 // The two groups share a prefix of length prefix_length_.
98 // Now we must compare all-against-all all the way.
99 for (const uint32* it1 = previous_begin_; it1 < previous_end_; ++it1) {
100 for (const uint32* it2 = begin; it2 < end; ++it2) {
101 int length1 = text_length_ - *it1;
102 int length2 = text_length_ - *it2;
103 limit = std::min<int>(length1, length2);
104 for (i = 0; i < limit; ++i)
105 if (text_[*it1 + i] != text_[*it2 + i]) break;
106 assert(i < length2);
107 if (i < length1) {
108 assert(text_[*it1 + i] < text_[*it2 + i]);
114 previous_begin_ = begin;
115 previous_end_ = end;
117 private:
118 const unsigned char* const text_;
119 const int text_length_;
120 const int prefix_length_;
121 const uint32* const suffixes_;
122 const uint32* const suffixes_end_;
123 const uint32* previous_begin_;
124 const uint32* previous_end_;
125 std::vector<int> report_count_;
128 void SortAndVerify(const std::string& str,
129 uint32 target_prefix_length) {
130 int text_length = str.size();
131 std::string::const_iterator text = str.begin();
132 std::string::const_iterator text_end = text + text_length;
134 int num_suffixes = text_length + 1;
135 std::vector<uint32> suffix_array(2 * num_suffixes);
136 for (int i = 0; i < num_suffixes; ++i) suffix_array[i] = i;
137 uint32* suffix_area = &suffix_array[0];
138 uint32* suffixes_end = suffix_area + num_suffixes;
139 uint32* full_area_end = suffix_area + suffix_array.size();
140 uint32* small_area_end = suffixes_end + (full_area_end - suffixes_end)/4;
142 const unsigned char* text1
143 = reinterpret_cast<const unsigned char*>(str.data());
144 const unsigned char* text_end1 = text1 + text_length;
145 SortedGroupVerifier verifier(text1, text_end1, suffix_area, suffixes_end,
146 target_prefix_length);
147 RandomNumberGenerator rng;
148 // no workspace
149 PermuteRandomly(suffix_area, suffixes_end, &rng);
150 verifier.Reset();
151 uint32* result =
152 StringsortSuffixes(text, text_end,
153 suffix_area, suffix_area, suffixes_end,
154 0, target_prefix_length,
155 verifier);
156 assert(result == suffixes_end);
157 verifier.FinalCheck();
158 // some workpace
159 PermuteRandomly(suffix_area, suffixes_end, &rng);
160 verifier.Reset();
161 uint32* suffixes_in =
162 std::copy_backward(suffix_area, suffixes_end, small_area_end);
163 result =
164 StringsortSuffixes(text, text_end,
165 suffix_area, suffixes_in, small_area_end,
166 0, target_prefix_length,
167 verifier);
168 assert(result == suffixes_end);
169 verifier.FinalCheck();
170 // full workpace
171 PermuteRandomly(suffix_area, suffixes_end, &rng);
172 verifier.Reset();
173 suffixes_in =
174 std::copy_backward(suffix_area, suffixes_end, full_area_end);
175 result =
176 StringsortSuffixes(text, text_end,
177 suffix_area, suffixes_in, full_area_end,
178 0U, target_prefix_length,
179 verifier);
180 assert(result == suffixes_end);
181 verifier.FinalCheck();
184 void TestEmpty() {
185 std::string str;
186 SortAndVerify(str, 1);
187 std::cout << "TestEmpty passed" << std::endl;
190 void TestLengthOne() {
191 std::string str1("\0");
192 SortAndVerify(str1, 1);
193 std::string str2 = "\xFF";
194 SortAndVerify(str2, 1);
195 std::cout << "TestLengthOne passed" << std::endl;
198 void TestBanana() {
199 std::string str("banana");
200 SortAndVerify(str, 6);
201 SortAndVerify(str, 3);
202 std::cout << "TestBanana passed" << std::endl;
205 char const * const dna_512 =
206 "CACTGTGCCATCATCATCACCACCACTGTCATTATCACCACCACCATCATCACCAACACCACTG"
207 "CCATCGTCATCACCACCACTGTCATTATCACCACCACCATCACCAACATCACCACCACCATTAT"
208 "CACCACCATCAACACCACCACCCCCATCATCATCATCACTACTACCATCATTACCAGCACCACC"
209 "ACCACTATCACCACCACCACCACAATCACCATCACCACTATCATCAACATCATCACTACCACCA"
210 "TCACCAACACCACCATCATTATCACCACCACCACCATCACCAACATCACCACCATCATCATCAC"
211 "CACCATCACCAAGACCATCATCATCACCATCACCACCAACATCACCACCATCACCAACACCACC"
212 "ATCACCACCACCACCACCATCATCACCACCACCACCATCATCATCACCACCACCGCCATCATCA"
213 "TCGCCACCACCATGACCACCACCATCACAACCATCACCACCATCACAACCACCATCATCACTAT";
215 void TestDNA() {
216 std::string str(dna_512);
217 SortAndVerify(str, 512);
218 SortAndVerify(str, 3);
219 std::cout << "TestDNA passed" << std::endl;
222 char const * const aaa_256 =
223 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
224 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
225 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
226 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
228 void TestAAAA() {
229 std::string str(aaa_256);
230 SortAndVerify(str, 256);
231 SortAndVerify(str, 13);
232 std::cout << "TestAAAA passed" << std::endl;
236 void TestAAAB() {
237 std::string str("AAAAAAAAB");
238 SortAndVerify(str, 9);
239 std::cout << "TestAAAB passed" << std::endl;
242 void TestRandomLargeAlphabet() {
243 std::string str(1<<14, '\0');
244 RandomNumberGenerator rng;
245 for (std::string::iterator i = str.begin(); i != str.end(); ++i) {
246 *i = rng.Uniform(256);
248 SortAndVerify(str, 1<<14);
249 std::cout << "TestRandomLargeAlphabet passed" << std::endl;
252 void TestRandomSmallAlphabet() {
253 std::string str(1<<14, '\0');
254 RandomNumberGenerator rng;
255 for (std::string::iterator i = str.begin(); i != str.end(); ++i) {
256 *i = 65 + rng.Uniform(2);
258 SortAndVerify(str, 1<<14);
259 std::cout << "TestRandomSmallAlphabet passed" << std::endl;
262 void TestRandomLimited() {
263 std::string str(1<<14, '\0');
264 RandomNumberGenerator rng;
265 for (std::string::iterator i = str.begin(); i != str.end(); ++i) {
266 *i = 65 + rng.Uniform(2);
268 SortAndVerify(str, 2);
269 std::cout << "TestRandomLimited passed" << std::endl;
272 void TestNoSuffixes() {
273 std::string str(1<<14, '\0');
274 RandomNumberGenerator rng;
275 for (std::string::iterator i = str.begin(); i != str.end(); ++i) {
276 *i = 65 + rng.Uniform(2);
278 SortAndVerify(str, 0);
279 std::cout << "TestNoSuffixes passed" << std::endl;
282 } // namespace
284 int main(int argc, char **argv) {
286 TestEmpty();
287 TestLengthOne();
288 TestBanana();
289 TestDNA();
290 TestAAAA();
291 TestAAAB();
292 TestRandomLargeAlphabet();
293 TestRandomSmallAlphabet();
294 TestRandomLimited();
295 TestNoSuffixes();
297 return 0;