1 // Copyright 2007 Google Inc.
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 #include "../src/prefix_doubling-inl.h"
25 #include "../src/prefix_doubling.h"
29 using dcsbwt::SortSuffixesByPrefixDoubling
;
30 using dcsbwt::PrefixDoubler
;
32 // This verification is based on Theorem 2 in:
34 // Burkhardt, Karkkainen:
35 // Fast lightweight suffix array construction and checking.
36 // Proc. 14th Symposium on Combinatorial Pattern Matching,
37 // LNCS 2676, Springer, 2003, pp. 55-69.
39 void Verify(std::string
const& str
, std::vector
<int> const& rank_array
,
40 std::vector
<int> const& suffix_array
) {
41 int str_length
= str
.length();
42 assert(suffix_array
[0] == str_length
);
43 assert(rank_array
[str_length
] == 0);
44 for (int rank
= 1; rank
<= str_length
; ++rank
) { // yes, I mean "<="
45 int suffix
= suffix_array
[rank
];
47 assert(suffix
< str_length
);
48 assert(rank_array
[suffix
] == rank
);
50 for (int rank
= 1; rank
< str_length
; ++rank
) { // yes, I mean "<"
51 int suffix1
= suffix_array
[rank
];
52 int suffix2
= suffix_array
[rank
+ 1];
53 unsigned char first_char1
= static_cast<unsigned char>(str
[suffix1
]);
54 unsigned char first_char2
= static_cast<unsigned char>(str
[suffix2
]);
55 assert(first_char1
<= first_char2
);
56 if (first_char1
== first_char2
) {
57 assert(rank_array
[suffix1
+ 1] < rank_array
[suffix2
+ 1]);
62 void ConstructAndVerify(std::string
const& str
) {
63 std::vector
<int> rank_array(str
.size() + 1);
64 std::vector
<int> suffix_array(str
.size() + 1);
65 SortSuffixesByPrefixDoubling(str
.begin(), str
.end(),
66 rank_array
.begin(), rank_array
.end(),
67 suffix_array
.begin(), suffix_array
.end());
68 Verify(str
, rank_array
, suffix_array
);
73 ConstructAndVerify(str
);
74 std::cout
<< "TestEmpty passed" << std::endl
;
78 void TestLengthOne() {
79 std::string
str("\0");
80 ConstructAndVerify(str
);
82 ConstructAndVerify(str
);
83 std::cout
<< "TestLengthOne passed" << std::endl
;
88 std::string
str("banana");
89 ConstructAndVerify(str
);
90 std::cout
<< "TestBanana passed" << std::endl
;
93 char const * const dna_512
=
94 "CACTGTGCCATCATCATCACCACCACTGTCATTATCACCACCACCATCATCACCAACACCACTG"
95 "CCATCGTCATCACCACCACTGTCATTATCACCACCACCATCACCAACATCACCACCACCATTAT"
96 "CACCACCATCAACACCACCACCCCCATCATCATCATCACTACTACCATCATTACCAGCACCACC"
97 "ACCACTATCACCACCACCACCACAATCACCATCACCACTATCATCAACATCATCACTACCACCA"
98 "TCACCAACACCACCATCATTATCACCACCACCACCATCACCAACATCACCACCATCATCATCAC"
99 "CACCATCACCAAGACCATCATCATCACCATCACCACCAACATCACCACCATCACCAACACCACC"
100 "ATCACCACCACCACCACCATCATCACCACCACCACCATCATCATCACCACCACCGCCATCATCA"
101 "TCGCCACCACCATGACCACCACCATCACAACCATCACCACCATCACAACCACCATCATCACTAT";
104 std::string
str(dna_512
);
105 ConstructAndVerify(str
);
106 std::cout
<< "TestDNA passed" << std::endl
;
109 char const * const aaa_256
=
110 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
111 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
112 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
113 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
116 std::string
str(aaa_256
);
117 std::vector
<int> rank_array(str
.size() + 1);
118 std::vector
<int> suffix_array(str
.size() + 1);
119 SortSuffixesByPrefixDoubling(str
.begin(), str
.end(),
120 rank_array
.begin(), rank_array
.end(),
121 suffix_array
.begin(), suffix_array
.end());
122 Verify(str
, rank_array
, suffix_array
);
123 std::vector
<int>::iterator it
=
124 std::adjacent_find(suffix_array
.begin(), suffix_array
.end(),
125 std::less_equal
<int>());
126 assert(suffix_array
.end() - it
== 0);
127 std::cout
<< "TestAAAA passed" << std::endl
;
131 std::string
str("AAAAAAAAB");
132 std::vector
<int> rank_array(str
.size() + 1);
133 std::vector
<int> suffix_array(str
.size() + 1);
134 SortSuffixesByPrefixDoubling(str
.begin(), str
.end(),
135 rank_array
.begin(), rank_array
.end(),
136 suffix_array
.begin(), suffix_array
.end());
137 Verify(str
, rank_array
, suffix_array
);
138 std::vector
<int>::iterator it
=
139 std::adjacent_find(suffix_array
.begin() + 1, suffix_array
.end(),
140 std::greater_equal
<int>());
141 assert(suffix_array
.end() - it
== 0);
142 std::cout
<< "TestAAAB passed" << std::endl
;
145 // Two strings with the same order of suffixes.
146 void TestSameOrder() {
147 std::string
str1("AAAAAAAAB");
148 std::vector
<int> rank_array(str1
.size() + 1);
149 std::vector
<int> suffix_array(str1
.size() + 1);
150 SortSuffixesByPrefixDoubling(str1
.begin(), str1
.end(),
151 rank_array
.begin(), rank_array
.end(),
152 suffix_array
.begin(), suffix_array
.end());
153 std::string
str2("XXXXXXXXY");
154 Verify(str2
, rank_array
, suffix_array
);
155 std::cout
<< "TestSameOrder passed" << std::endl
;
158 // Check that '\xFF' > 'A'
159 void TestAscii255() {
160 std::string
str1("AAAAAAAAB");
161 std::vector
<int> rank_array(str1
.size() + 1);
162 std::vector
<int> suffix_array(str1
.size() + 1);
163 SortSuffixesByPrefixDoubling(str1
.begin(), str1
.end(),
164 rank_array
.begin(), rank_array
.end(),
165 suffix_array
.begin(), suffix_array
.end());
166 std::string
str2("AAAAAAAA\xFF");
167 Verify(str2
, rank_array
, suffix_array
);
168 std::cout
<< "TestAscii255 passed" << std::endl
;
171 void TestComputeOneOrder() {
172 std::string
str(dna_512
);
173 std::vector
<int> rank_array(str
.size() + 1);
174 std::vector
<int> suffix_array(str
.size() + 1);
175 PrefixDoubler
<int> pd(str
.size());
176 pd
.ComputeOneOrder(str
.begin(), str
.end(),
177 rank_array
.begin(), rank_array
.end(),
178 suffix_array
.begin(), suffix_array
.end());
179 pd
.SortSuffixes(rank_array
.begin(), rank_array
.end(),
180 suffix_array
.begin(), suffix_array
.end(), 1);
181 pd
.ComputeFinalSuffixArray(rank_array
.begin(), rank_array
.end(),
182 suffix_array
.begin(), suffix_array
.end());
183 Verify(str
, rank_array
, suffix_array
);
184 std::cout
<< "TestComputeOneOrder passed" << std::endl
;
188 std::string
str(dna_512
);
189 std::vector
<int> rank_array(str
.size() + 1);
190 std::vector
<int> suffix_array(str
.size() + 1);
191 PrefixDoubler
<int> pd(str
.size());
192 pd
.ComputeOneOrder(str
.begin(), str
.end(),
193 rank_array
.begin(), rank_array
.end(),
194 suffix_array
.begin(), suffix_array
.end());
195 int order_depth
= pd
.Double(rank_array
.begin(), rank_array
.end(),
196 suffix_array
.begin(), suffix_array
.end(), 1);
197 pd
.SortSuffixes(rank_array
.begin(), rank_array
.end(),
198 suffix_array
.begin(), suffix_array
.end(), order_depth
);
199 pd
.ComputeFinalSuffixArray(rank_array
.begin(), rank_array
.end(),
200 suffix_array
.begin(), suffix_array
.end());
201 Verify(str
, rank_array
, suffix_array
);
202 std::cout
<< "TestDouble passed" << std::endl
;
205 void TestComputeInitialSuffixArray() {
206 std::string
str(dna_512
);
207 std::vector
<int> rank_array(str
.size() + 1);
208 std::vector
<int> suffix_array(str
.size() + 1);
209 PrefixDoubler
<int> pd(str
.size());
210 pd
.ComputeOneOrder(str
.begin(), str
.end(),
211 rank_array
.begin(), rank_array
.end(),
212 suffix_array
.begin(), suffix_array
.end());
213 pd
.ComputeInitialSuffixArray(rank_array
.begin(), rank_array
.end(),
214 suffix_array
.begin(), suffix_array
.end(), 1);
215 pd
.SortSuffixes(rank_array
.begin(), rank_array
.end(),
216 suffix_array
.begin(), suffix_array
.end(), 1);
217 pd
.ComputeFinalSuffixArray(rank_array
.begin(), rank_array
.end(),
218 suffix_array
.begin(), suffix_array
.end());
219 Verify(str
, rank_array
, suffix_array
);
220 std::cout
<< "TestComputeInitialSuffixArray passed" << std::endl
;
225 int main(int argc
, char **argv
) {
235 TestComputeOneOrder();
236 TestComputeInitialSuffixArray();