2 * @brief Test the stemming API
4 /* Copyright (C) 2010,2012,2019 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "testsuite.h"
29 #include "testutils.h"
33 class MyStemImpl
: public Xapian::StemImplementation
{
34 string
operator()(const string
& word
) {
37 return word
.substr(0, 3);
40 string
get_description() const {
45 /// Test user stemming algorithms.
46 DEFINE_TESTCASE(stem1
, !backend
) {
47 Xapian::Stem
st(new MyStemImpl
);
49 TEST_EQUAL(st
.get_description(), "Xapian::Stem(MyStem())");
50 TEST_EQUAL(st("a"), "a");
51 TEST_EQUAL(st("foo"), "foo");
52 TEST_EQUAL(st("food"), "foo");
55 /// New feature in 1.0.21/1.2.1 - "nb" and "nn" select the Norwegian stemmer.
56 DEFINE_TESTCASE(stem2
, !backend
) {
57 Xapian::Stem
st_norwegian("norwegian");
58 TEST_EQUAL(st_norwegian
.get_description(),
59 Xapian::Stem("nb").get_description());
60 TEST_EQUAL(st_norwegian
.get_description(),
61 Xapian::Stem("nn").get_description());
62 TEST_EQUAL(st_norwegian
.get_description(),
63 Xapian::Stem("no").get_description());
64 TEST_NOT_EQUAL(st_norwegian
.get_description(),
65 Xapian::Stem("en").get_description());
66 TEST_NOT_EQUAL(st_norwegian
.get_description(),
67 Xapian::Stem("none").get_description());
70 /// Test add a stemmer test
71 DEFINE_TESTCASE(stem3
, !backend
) {
72 Xapian::Stem
earlyenglish("earlyenglish");
73 TEST_EQUAL(earlyenglish("loved"), "love");
74 TEST_EQUAL(earlyenglish("loving"), "love");
75 TEST_EQUAL(earlyenglish("loveth"), "love");
76 TEST_EQUAL(earlyenglish("givest"), "give");
79 /// Test handling of a stemmer returning an empty string.
80 // Regression test for https://trac.xapian.org/ticket/741 fixed in 1.4.2.
81 DEFINE_TESTCASE(stemempty1
, !backend
) {
82 Xapian::Stem
st(new MyStemImpl
);
84 Xapian::TermGenerator tg
;
88 tg
.set_stemming_strategy(tg
.STEM_ALL
);
90 tg
.index_text("watch me vanish now");
91 auto i
= doc
.termlist_begin();
92 TEST(i
!= doc
.termlist_end());
94 TEST(++i
!= doc
.termlist_end());
95 TEST_EQUAL(*i
, "now");
96 TEST(++i
!= doc
.termlist_end());
97 TEST_EQUAL(*i
, "wat");
98 TEST(++i
== doc
.termlist_end());
101 /// Test invalid language names with various characters in.
102 DEFINE_TESTCASE(stemlangs2
, !backend
) {
103 string
lang("xdummy");
104 for (unsigned ch
= 0; ch
<= 255; ++ch
) {
106 TEST_EXCEPTION(Xapian::InvalidArgumentError
, Xapian::Stem
stem(lang
));
109 // Test fallback=false throws too.
110 TEST_EXCEPTION(Xapian::InvalidArgumentError
, Xapian::Stem
stem("x", false));
112 // Test fallback=true gives "none" stemmer.
113 Xapian::Stem
stem(lang
, true);
114 TEST(stem
.is_none());
115 TEST_EQUAL(stem
.get_description(), "Xapian::Stem(none)");