1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/third_party/qsufsort.h"
12 #include "base/macros.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "testing/gtest/include/gtest/gtest.h"
16 TEST(QSufSortTest
, Sort
) {
17 const char* test_cases
[] = {
24 "The quick brown fox jumps over the lazy dog.",
25 "elephantelephantelephantelephantelephant",
26 "-------------------------",
27 "011010011001011010010110011010010",
28 "3141592653589793238462643383279502884197169399375105",
29 "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
32 for (size_t idx
= 0; idx
< arraysize(test_cases
); ++idx
) {
33 int len
= static_cast<int>(::strlen(test_cases
[idx
]));
34 const unsigned char* s
=
35 reinterpret_cast<const unsigned char*>(test_cases
[idx
]);
37 // Generate the suffix array as I.
38 std::vector
<int> I(len
+ 1);
39 std::vector
<int> V(len
+ 1);
40 courgette::qsuf::qsufsort
<int*>(&I
[0], &V
[0], s
, len
);
42 // Expect that I[] is a permutation of [0, len].
43 std::vector
<int> I_sorted(I
);
44 std::sort(I_sorted
.begin(), I_sorted
.end());
45 for (int i
= 0; i
< len
+ 1; ++i
) {
46 EXPECT_EQ(i
, I_sorted
[i
]) << "test_case[" << idx
<< "]";
49 // First string must be empty string.
50 EXPECT_EQ(len
, I
[0]) << "test_case[" << idx
<< "]";
52 // Expect that the |len + 1| suffixes are strictly ordered.
53 const unsigned char* end
= s
+ len
;
54 for (int i
= 0; i
< len
; ++i
) {
55 const unsigned char* suf1
= s
+ I
[i
];
56 const unsigned char* suf2
= s
+ I
[i
+ 1];
57 bool is_less
= std::lexicographical_compare(suf1
, end
, suf2
, end
);
58 EXPECT_TRUE(is_less
) << "test_case[" << idx
<< "]";
63 TEST(QSufSortTest
, Search
) {
64 // Initialize main string and the suffix array.
65 // Positions: 00000000001111111111122222222233333333334444
66 // 01234567890123456789012345678901234567890123
67 const char* old_str
= "the quick brown fox jumps over the lazy dog.";
68 int old_size
= static_cast<int>(::strlen(old_str
));
69 const unsigned char* old_buf
=
70 reinterpret_cast<const unsigned char*>(old_str
);
71 std::vector
<int> I(old_size
+ 1);
72 std::vector
<int> V(old_size
+ 1);
73 courgette::qsuf::qsufsort
<int*>(&I
[0], &V
[0], old_buf
, old_size
);
77 int exp_pos
; // -1 means "don't care".
79 const char* query_str
;
82 {0, 44, "the quick brown fox jumps over the lazy dog."},
84 {-1, 0, ""}, // Current algorithm does not enforce |pos| == 0.
85 // Exact and unique suffix match.
87 {31, 13, "the lazy dog."},
88 // Exact and unique non-suffix match.
90 {0, 9, "the quick"}, // Unique prefix.
91 // Entire word match with mutiple results: take lexicographical first.
92 {31, 3, "the"}, // Non-unique prefix: "the l"... < "the q"...
93 {9, 1, " "}, // " brown"... wins.
94 // Partial and unique match of query prefix.
95 {16, 10, "fox jumps with the hosps"},
96 // Partial and multiple match of query prefix: no guarantees on |pos|.
97 // Take lexicographical first for matching portion *only*, so same results:
98 {-1, 4, "the apple"}, // query < "the l"... < "the q"...
99 {-1, 4, "the opera"}, // "the l"... < query < "the q"...
100 {-1, 4, "the zebra"}, // "the l"... < "the q"... < query
101 // Prefix match dominates suffix match.
102 {26, 5, "over quick brown fox"},
106 {-1, 0, "THE QUICK BROWN FOX"},
110 for (size_t idx
= 0; idx
< arraysize(test_cases
); ++idx
) {
111 const auto& test_case
= test_cases
[idx
];
112 int new_size
= static_cast<int>(::strlen(test_case
.query_str
));
113 const unsigned char* new_buf
=
114 reinterpret_cast<const unsigned char*>(test_case
.query_str
);
116 // Perform the search.
118 int match_len
= courgette::qsuf::search(
119 &I
[0], old_buf
, old_size
, new_buf
, new_size
, &pos
);
121 // Check basic properties and match with expected values.
122 EXPECT_GE(match_len
, 0) << "test_case[" << idx
<< "]";
123 EXPECT_LE(match_len
, new_size
) << "test_case[" << idx
<< "]";
125 EXPECT_GE(pos
, 0) << "test_case[" << idx
<< "]";
126 EXPECT_LE(pos
, old_size
- match_len
) << "test_case[" << idx
<< "]";
127 EXPECT_EQ(0, ::memcmp(old_buf
+ pos
, new_buf
, match_len
))
128 << "test_case[" << idx
<< "]";
130 if (test_case
.exp_pos
>= 0) {
131 EXPECT_EQ(test_case
.exp_pos
, pos
) << "test_case[" << idx
<< "]";
133 EXPECT_EQ(test_case
.exp_match_len
, match_len
) << "test_case[" << idx
<< "]";