Version 6.4.0.0.beta1, tag libreoffice-6.4.0.0.beta1
[LibreOffice.git] / i18npool / qa / cppunit / test_textsearch.cxx
blobb2175b21bfa5457d5055e86b943141a829c40328
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/util/SearchFlags.hpp>
21 #include <com/sun/star/util/SearchOptions.hpp>
22 #include <com/sun/star/util/SearchAlgorithms2.hpp>
23 #include <com/sun/star/util/XTextSearch2.hpp>
24 #include <unotest/bootstrapfixturebase.hxx>
25 #include <i18nutil/transliteration.hxx>
27 #include <unicode/regex.h>
29 using namespace ::com::sun::star;
30 typedef U_ICU_NAMESPACE::UnicodeString IcuUniString;
32 class TestTextSearch : public test::BootstrapFixtureBase
34 public:
35 virtual void setUp() override;
36 virtual void tearDown() override;
38 void testICU();
39 void testSearches();
40 void testWildcardSearch();
42 CPPUNIT_TEST_SUITE(TestTextSearch);
43 CPPUNIT_TEST(testICU);
44 CPPUNIT_TEST(testSearches);
45 CPPUNIT_TEST(testWildcardSearch);
46 CPPUNIT_TEST_SUITE_END();
47 private:
48 uno::Reference<util::XTextSearch> m_xSearch;
49 uno::Reference<util::XTextSearch2> m_xSearch2;
52 // Sanity check our ICU first ...
53 void TestTextSearch::testICU()
55 UErrorCode nErr = U_ZERO_ERROR;
56 sal_uInt32 nSearchFlags = UREGEX_UWORD | UREGEX_CASE_INSENSITIVE;
58 OUString aString( "abcdefgh" );
59 OUString aPattern( "e" );
60 IcuUniString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() );
62 std::unique_ptr<icu::RegexMatcher> pRegexMatcher(new icu::RegexMatcher( aSearchPat, nSearchFlags, nErr ));
64 IcuUniString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() );
65 pRegexMatcher->reset( aSource );
67 CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
68 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
69 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(4), pRegexMatcher->start( nErr ) );
70 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
71 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(5), pRegexMatcher->end( nErr ) );
72 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
74 OUString aString2( "acababaabcababadcdaa" );
75 OUString aPattern2( "a" );
77 IcuUniString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() );
78 pRegexMatcher.reset(new icu::RegexMatcher( aSearchPat2, nSearchFlags, nErr ));
80 IcuUniString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() );
81 pRegexMatcher->reset( aSource2 );
83 CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
84 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
85 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(0), pRegexMatcher->start( nErr ) );
86 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
87 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(1), pRegexMatcher->end( nErr ) );
88 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
91 void TestTextSearch::testSearches()
93 OUString str( "acababaabcababadcdaa" );
94 sal_Int32 startPos = 2, endPos = 20 ;
95 OUString const searchStr( "(ab)*a(c|d)+" );
96 sal_Int32 const fStartRes = 10, fEndRes = 18 ;
97 sal_Int32 const bStartRes = 18, bEndRes = 10 ;
99 // set options
100 util::SearchOptions aOptions;
101 aOptions.algorithmType = util::SearchAlgorithms_REGEXP ;
102 aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
103 aOptions.searchString = searchStr;
104 m_xSearch->setOptions( aOptions );
106 util::SearchResult aRes;
108 // search forward
109 aRes = m_xSearch->searchForward( str, startPos, endPos );
110 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
111 CPPUNIT_ASSERT_EQUAL( fStartRes, aRes.startOffset[0] );
112 CPPUNIT_ASSERT_EQUAL( fEndRes, aRes.endOffset[0] );
114 // search backwards
115 aRes = m_xSearch->searchBackward( str, endPos, startPos );
116 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
117 CPPUNIT_ASSERT_EQUAL( bStartRes, aRes.startOffset[0] );
118 CPPUNIT_ASSERT_EQUAL( bEndRes, aRes.endOffset[0] );
120 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
121 | TransliterationFlags::IGNORE_WIDTH);
122 aOptions.searchString = "([^ ]*)[ ]*([^ ]*)";
123 m_xSearch->setOptions(aOptions);
124 aRes = m_xSearch->searchForward("11 22 33", 2, 7);
125 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.subRegExpressions);
126 CPPUNIT_ASSERT((aRes.startOffset[0] == 2) && (aRes.endOffset[0] == 5));
127 CPPUNIT_ASSERT((aRes.startOffset[1] == 2) && (aRes.endOffset[1] == 2));
128 CPPUNIT_ASSERT((aRes.startOffset[2] == 3) && (aRes.endOffset[2] == 5));
131 void TestTextSearch::testWildcardSearch()
133 util::SearchOptions2 aOptions;
134 OUString aText;
135 util::SearchResult aRes;
137 aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ;
138 aOptions.WildcardEscapeCharacter = '~';
139 // aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION;
140 // is not set, so substring match is allowed.
141 aOptions.transliterateFlags = sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE);
142 aText = "abAca";
144 aOptions.searchString = "a";
145 m_xSearch2->setOptions2( aOptions );
146 // match first "a", [0,1)
147 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
148 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
149 CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 1));
150 // match last "a", (5,4]
151 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
152 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
153 CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 4));
155 aOptions.searchString = "a?";
156 m_xSearch2->setOptions2( aOptions );
157 // match "ab", [0,2)
158 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
159 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
160 CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 2));
161 // match "ac", (4,2]
162 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
163 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
164 CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 2));
166 aOptions.searchString = "a*c";
167 m_xSearch2->setOptions2( aOptions );
168 // match "abac", [0,4) XXX NOTE: first match forward
169 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
170 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
171 CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 4));
172 // match "ac", (4,2] XXX NOTE: first match backward, not greedy
173 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
174 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
175 CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 2));
177 aOptions.searchString = "b*a";
178 m_xSearch2->setOptions2( aOptions );
179 // match "ba", [1,3) XXX NOTE: first match forward, not greedy
180 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
181 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
182 CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 3));
183 // match "baca", (5,1] XXX NOTE: first match backward
184 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
185 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
186 CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 1));
188 aText = "ab?ca";
190 aOptions.searchString = "?~??";
191 m_xSearch2->setOptions2( aOptions );
192 // match "b?c", [1,4)
193 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
194 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
195 CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 4));
196 // match "b?c", (4,1]
197 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
198 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
199 CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 1));
201 aText = "ab*ca";
203 aOptions.searchString = "?~*?";
204 m_xSearch2->setOptions2( aOptions );
205 // match "b?c", [1,4)
206 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
207 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
208 CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 4));
209 // match "b?c", (4,1]
210 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
211 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
212 CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 1));
214 aOptions.searchString = "ca?";
215 m_xSearch2->setOptions2( aOptions );
216 // no match
217 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
218 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
219 // no match
220 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
221 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
223 aOptions.searchString = "ca*";
224 m_xSearch2->setOptions2( aOptions );
225 // match "ca", [3,5)
226 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
227 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
228 CPPUNIT_ASSERT((aRes.startOffset[0] == 3) && (aRes.endOffset[0] == 5));
229 // match "ca", (5,3]
230 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
231 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
232 CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 3));
234 aOptions.searchString = "*ca*";
235 m_xSearch2->setOptions2( aOptions );
236 // match "abaca", [0,5)
237 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
238 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
239 CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 5));
240 // match "abaca", (5,0]
241 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
242 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
243 CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 0));
245 aText = "123123";
246 aOptions.searchString = "*2?";
247 m_xSearch2->setOptions2( aOptions );
248 // match first "123", [0,3)
249 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
250 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
251 CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 3));
252 // match "123123", (6,0] Yes this looks odd, but it is as searching "?2*" forward.
253 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
254 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
255 CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));
257 aOptions.searchFlag |= util::SearchFlags::WILD_MATCH_SELECTION;
258 m_xSearch2->setOptions2( aOptions );
259 // match "123123", [0,6) with greedy '*'
260 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
261 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
262 CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 6));
263 // match "123123", (6,0]
264 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
265 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
266 CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));
269 void TestTextSearch::setUp()
271 BootstrapFixtureBase::setUp();
272 m_xSearch.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch"), uno::UNO_QUERY_THROW);
273 m_xSearch2.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch2"), uno::UNO_QUERY_THROW);
276 void TestTextSearch::tearDown()
278 m_xSearch.clear();
279 m_xSearch2.clear();
280 BootstrapFixtureBase::tearDown();
283 CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch);
285 CPPUNIT_PLUGIN_IMPLEMENT();
287 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */