1 //===- unittests/Support/SuffixTreeTest.cpp - suffix tree tests -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Support/SuffixTree.h"
10 #include "gtest/gtest.h"
17 // Each example vector has a unique element at the end to represent the end of
20 // Tests that The SuffixTree finds a simple repetition of the substring {1, 2}
21 // {1, 2} twice in the provided string.
22 TEST(SuffixTreeTest
, TestSingleRepetition
) {
23 std::vector
<unsigned> SimpleRepetitionData
= {1, 2, 1, 2, 3};
24 SuffixTree
ST(SimpleRepetitionData
);
25 std::vector
<SuffixTree::RepeatedSubstring
> SubStrings
;
26 for (auto It
= ST
.begin(); It
!= ST
.end(); It
++)
27 SubStrings
.push_back(*It
);
28 ASSERT_EQ(SubStrings
.size(), 1u);
29 EXPECT_EQ(SubStrings
[0].Length
, 2u);
30 EXPECT_EQ(SubStrings
[0].StartIndices
.size(), 2u);
31 for (unsigned StartIdx
: SubStrings
[0].StartIndices
) {
32 EXPECT_EQ(SimpleRepetitionData
[StartIdx
], 1u);
33 EXPECT_EQ(SimpleRepetitionData
[StartIdx
+ 1], 2u);
37 // Tests that the SuffixTree is able to find the substrings {1, 2, 3} at
38 // at indices 0 and 3 as well as the substrings {2, 3} at indices 1 and 4.
39 // This test also serves as a flag for improvements to the suffix tree.
41 // FIXME: Right now, the longest repeated substring from a specific index is
42 // returned, this could be improved to return the longest repeated substring, as
43 // well as those that are smaller.
44 TEST(SuffixTreeTest
, TestLongerRepetition
) {
45 std::vector
<unsigned> RepeatedRepetitionData
= {1, 2, 3, 1, 2, 3, 4};
46 SuffixTree
ST(RepeatedRepetitionData
);
47 std::vector
<SuffixTree::RepeatedSubstring
> SubStrings
;
48 for (auto It
= ST
.begin(); It
!= ST
.end(); It
++)
49 SubStrings
.push_back(*It
);
50 EXPECT_EQ(SubStrings
.size(), 2u);
52 for (SuffixTree::RepeatedSubstring
&RS
: SubStrings
) {
54 bool IsExpectedLen
= (Len
== 3u || Len
== 2u);
56 ASSERT_TRUE(IsExpectedLen
);
59 for (unsigned StartIdx
: RS
.StartIndices
) {
60 IsExpectedIndex
= (StartIdx
== 0u || StartIdx
== 3u);
61 EXPECT_TRUE(IsExpectedIndex
);
62 EXPECT_EQ(RepeatedRepetitionData
[StartIdx
], 1u);
63 EXPECT_EQ(RepeatedRepetitionData
[StartIdx
+ 1], 2u);
64 EXPECT_EQ(RepeatedRepetitionData
[StartIdx
+ 2], 3u);
67 for (unsigned StartIdx
: RS
.StartIndices
) {
68 IsExpectedIndex
= (StartIdx
== 1u || StartIdx
== 4u);
69 EXPECT_TRUE(IsExpectedIndex
);
70 EXPECT_EQ(RepeatedRepetitionData
[StartIdx
], 2u);
71 EXPECT_EQ(RepeatedRepetitionData
[StartIdx
+ 1], 3u);
77 // Tests that the SuffixTree is able to find substring {1, 1, 1, 1, 1} at
80 // FIXME: Add support for detecting {1, 1} and {1, 1, 1}
81 TEST(SuffixTreeTest
, TestSingleCharacterRepeat
) {
82 std::vector
<unsigned> RepeatedRepetitionData
= {1, 1, 1, 1, 1, 1, 2};
83 std::vector
<unsigned>::iterator RRDIt
, RRDIt2
;
84 SuffixTree
ST(RepeatedRepetitionData
);
85 std::vector
<SuffixTree::RepeatedSubstring
> SubStrings
;
86 for (auto It
= ST
.begin(); It
!= ST
.end(); It
++)
87 SubStrings
.push_back(*It
);
88 EXPECT_EQ(SubStrings
.size(), 1u);
89 for (SuffixTree::RepeatedSubstring
&RS
: SubStrings
) {
90 EXPECT_EQ(RS
.StartIndices
.size(),
91 RepeatedRepetitionData
.size() - RS
.Length
);
92 for (unsigned StartIdx
: SubStrings
[0].StartIndices
) {
93 RRDIt
= RRDIt2
= RepeatedRepetitionData
.begin();
94 std::advance(RRDIt
, StartIdx
);
95 std::advance(RRDIt2
, StartIdx
+ SubStrings
[0].Length
);
97 all_of(make_range
<std::vector
<unsigned>::iterator
>(RRDIt
, RRDIt2
),
98 [](unsigned Elt
) { return Elt
== 1; }));
103 // The suffix tree cannot currently find repeated substrings in strings of the
104 // form {1, 2, 3, 1, 2, 3}, because the two {1, 2, 3}s are adjacent ("tandem
107 // FIXME: Teach the SuffixTree to recognize these cases.
108 TEST(SuffixTreeTest
, TestTandemRepeat
) {
109 std::vector
<unsigned> RepeatedRepetitionData
= {1, 2, 3, 1, 2, 3};
110 SuffixTree
ST(RepeatedRepetitionData
);
111 std::vector
<SuffixTree::RepeatedSubstring
> SubStrings
;
112 for (auto It
= ST
.begin(); It
!= ST
.end(); It
++)
113 SubStrings
.push_back(*It
);
114 EXPECT_EQ(SubStrings
.size(), 0u);
117 // Tests that the SuffixTree does not erroneously include values that are not
118 // in repeated substrings. That is, only finds {1, 1} at indices 0 and 3 and
119 // does not include 2 and 3.
120 TEST(SuffixTreeTest
, TestExclusion
) {
121 std::vector
<unsigned> RepeatedRepetitionData
= {1, 1, 2, 1, 1, 3};
122 std::vector
<unsigned>::iterator RRDIt
, RRDIt2
;
123 SuffixTree
ST(RepeatedRepetitionData
);
124 std::vector
<SuffixTree::RepeatedSubstring
> SubStrings
;
125 for (auto It
= ST
.begin(); It
!= ST
.end(); It
++)
126 SubStrings
.push_back(*It
);
127 EXPECT_EQ(SubStrings
.size(), 1u);
128 bool IsExpectedIndex
;
129 for (SuffixTree::RepeatedSubstring
&RS
: SubStrings
) {
130 for (unsigned StartIdx
: RS
.StartIndices
) {
131 IsExpectedIndex
= (StartIdx
== 0u || StartIdx
== 3u);
132 EXPECT_TRUE(IsExpectedIndex
);
133 RRDIt
= RRDIt2
= RepeatedRepetitionData
.begin();
134 std::advance(RRDIt
, StartIdx
);
135 std::advance(RRDIt2
, StartIdx
+ RS
.Length
);
137 all_of(make_range
<std::vector
<unsigned>::iterator
>(RRDIt
, RRDIt2
),
138 [](unsigned Elt
) { return Elt
== 1; }));