tdf#163083: try to release lock to avoid deadlock
[LibreOffice.git] / i18npool / qa / cppunit / test_textsearch.cxx
blob38cc099b7c95d935c61d94846cd06d484296e0fd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/util/SearchFlags.hpp>
21 #include <com/sun/star/util/SearchOptions.hpp>
22 #include <com/sun/star/util/SearchAlgorithms2.hpp>
23 #include <com/sun/star/util/XTextSearch2.hpp>
24 #include <unotest/bootstrapfixturebase.hxx>
25 #include <i18nutil/transliteration.hxx>
27 #include <unicode/regex.h>
29 using namespace ::com::sun::star;
31 class TestTextSearch : public test::BootstrapFixtureBase
33 public:
34 virtual void setUp() override;
35 virtual void tearDown() override;
37 void testICU();
38 void testSearches();
39 void testWildcardSearch();
40 void testApostropheSearch();
41 void testQuotationMarkSearch();
42 void testTdf138410();
44 CPPUNIT_TEST_SUITE(TestTextSearch);
45 CPPUNIT_TEST(testICU);
46 CPPUNIT_TEST(testSearches);
47 CPPUNIT_TEST(testWildcardSearch);
48 CPPUNIT_TEST(testApostropheSearch);
49 CPPUNIT_TEST(testQuotationMarkSearch);
50 CPPUNIT_TEST(testTdf138410);
51 CPPUNIT_TEST_SUITE_END();
52 private:
53 uno::Reference<util::XTextSearch> m_xSearch;
54 uno::Reference<util::XTextSearch2> m_xSearch2;
57 // Sanity check our ICU first ...
58 void TestTextSearch::testICU()
60 UErrorCode nErr = U_ZERO_ERROR;
61 sal_uInt32 nSearchFlags = UREGEX_UWORD | UREGEX_CASE_INSENSITIVE;
63 OUString aString( u"abcdefgh"_ustr );
64 OUString aPattern( u"e"_ustr );
65 icu::UnicodeString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() );
67 std::unique_ptr<icu::RegexMatcher> pRegexMatcher(new icu::RegexMatcher( aSearchPat, nSearchFlags, nErr ));
69 icu::UnicodeString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() );
70 pRegexMatcher->reset( aSource );
72 CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
73 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
74 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(4), pRegexMatcher->start( nErr ) );
75 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
76 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(5), pRegexMatcher->end( nErr ) );
77 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
79 OUString aString2( u"acababaabcababadcdaa"_ustr );
80 OUString aPattern2( u"a"_ustr );
82 icu::UnicodeString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() );
83 pRegexMatcher.reset(new icu::RegexMatcher( aSearchPat2, nSearchFlags, nErr ));
85 icu::UnicodeString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() );
86 pRegexMatcher->reset( aSource2 );
88 CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
89 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
90 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(0), pRegexMatcher->start( nErr ) );
91 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
92 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(1), pRegexMatcher->end( nErr ) );
93 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
96 void TestTextSearch::testSearches()
98 OUString str( u"acababaabcababadcdaa"_ustr );
99 sal_Int32 startPos = 2, endPos = 20 ;
100 sal_Int32 const fStartRes = 10, fEndRes = 18 ;
101 sal_Int32 const bStartRes = 18, bEndRes = 10 ;
103 // set options
104 util::SearchOptions aOptions;
105 aOptions.algorithmType = util::SearchAlgorithms_REGEXP ;
106 aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
107 aOptions.searchString = "(ab)*a(c|d)+";
108 m_xSearch->setOptions( aOptions );
110 util::SearchResult aRes;
112 // search forward
113 aRes = m_xSearch->searchForward( str, startPos, endPos );
114 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
115 CPPUNIT_ASSERT_EQUAL( fStartRes, aRes.startOffset[0] );
116 CPPUNIT_ASSERT_EQUAL( fEndRes, aRes.endOffset[0] );
118 // search backwards
119 aRes = m_xSearch->searchBackward( str, endPos, startPos );
120 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
121 CPPUNIT_ASSERT_EQUAL( bStartRes, aRes.startOffset[0] );
122 CPPUNIT_ASSERT_EQUAL( bEndRes, aRes.endOffset[0] );
124 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
125 | TransliterationFlags::IGNORE_WIDTH);
126 aOptions.searchString = "([^ ]*)[ ]*([^ ]*)";
127 m_xSearch->setOptions(aOptions);
128 aRes = m_xSearch->searchForward(u"11 22 33"_ustr, 2, 7);
129 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.subRegExpressions);
130 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[0]);
131 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
132 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[1]);
133 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[1]);
134 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[2]);
135 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[2]);
138 void TestTextSearch::testWildcardSearch()
140 util::SearchOptions2 aOptions;
141 OUString aText;
142 util::SearchResult aRes;
144 aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ;
145 aOptions.WildcardEscapeCharacter = '~';
146 // aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION;
147 // is not set, so substring match is allowed.
148 aOptions.transliterateFlags = sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE);
149 aText = "abAca";
151 aOptions.searchString = "a";
152 m_xSearch2->setOptions2( aOptions );
153 // match first "a", [0,1)
154 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
155 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
156 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
157 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
158 // match last "a", (5,4]
159 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
160 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
161 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
162 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
164 aOptions.searchString = "a?";
165 m_xSearch2->setOptions2( aOptions );
166 // match "ab", [0,2)
167 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
168 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
169 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
170 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
171 // match "ac", (4,2]
172 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
173 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
174 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
175 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
177 aOptions.searchString = "a*c";
178 m_xSearch2->setOptions2( aOptions );
179 // match "abac", [0,4) XXX NOTE: first match forward
180 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
181 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
182 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
183 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
184 // match "ac", (4,2] XXX NOTE: first match backward, not greedy
185 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
186 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
187 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
188 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
190 aOptions.searchString = "b*a";
191 m_xSearch2->setOptions2( aOptions );
192 // match "ba", [1,3) XXX NOTE: first match forward, not greedy
193 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
194 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
195 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
196 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
197 // match "baca", (5,1] XXX NOTE: first match backward
198 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
199 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
200 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
201 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
203 aText = "ab?ca";
205 aOptions.searchString = "?~??";
206 m_xSearch2->setOptions2( aOptions );
207 // match "b?c", [1,4)
208 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
209 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
210 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
211 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
212 // match "b?c", (4,1]
213 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
214 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
215 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
216 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
218 aText = "ab*ca";
220 aOptions.searchString = "?~*?";
221 m_xSearch2->setOptions2( aOptions );
222 // match "b?c", [1,4)
223 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
224 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
225 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
226 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
227 // match "b?c", (4,1]
228 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
229 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
230 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
231 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
233 aOptions.searchString = "ca?";
234 m_xSearch2->setOptions2( aOptions );
235 // no match
236 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
237 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
238 // no match
239 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
240 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
242 aOptions.searchString = "ca*";
243 m_xSearch2->setOptions2( aOptions );
244 // match "ca", [3,5)
245 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
246 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
247 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[0]);
248 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
249 // match "ca", (5,3]
250 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
251 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
252 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
253 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
255 aOptions.searchString = "*ca*";
256 m_xSearch2->setOptions2( aOptions );
257 // match "abaca", [0,5)
258 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
259 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
260 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
261 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
262 // match "abaca", (5,0]
263 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
264 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
265 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
266 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
268 aText = "123123";
269 aOptions.searchString = "*2?";
270 m_xSearch2->setOptions2( aOptions );
271 // match first "123", [0,3)
272 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
273 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
274 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
275 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
276 // match "123123", (6,0] Yes this looks odd, but it is as searching "?2*" forward.
277 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
278 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
279 CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]);
280 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
282 aOptions.searchFlag |= util::SearchFlags::WILD_MATCH_SELECTION;
283 m_xSearch2->setOptions2( aOptions );
284 // match "123123", [0,6) with greedy '*'
285 aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
286 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
287 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
288 CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.endOffset[0]);
289 // match "123123", (6,0]
290 aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
291 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
292 CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]);
293 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
296 void TestTextSearch::testApostropheSearch()
298 // A) find typographic apostrophes also by using ASCII apostrophe in searchString
299 OUString str( u"It\u2019s an apostrophe."_ustr );
300 sal_Int32 startPos = 0, endPos = str.getLength();
302 // set options
303 util::SearchOptions aOptions;
304 aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
305 aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
306 aOptions.searchString = "'";
307 m_xSearch->setOptions( aOptions );
309 util::SearchResult aRes;
311 // search forward
312 aRes = m_xSearch->searchForward( str, startPos, endPos );
313 // This was 0.
314 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
315 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
316 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
318 // search backwards
319 aRes = m_xSearch->searchBackward( str, endPos, startPos );
320 // This was 0.
321 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
322 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
323 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
325 // check with transliteration
326 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
327 | TransliterationFlags::IGNORE_WIDTH);
328 m_xSearch->setOptions(aOptions);
330 // search forward
331 aRes = m_xSearch->searchForward( str, startPos, endPos );
332 // This was 0.
333 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
334 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
335 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
337 // search backwards
338 aRes = m_xSearch->searchBackward( str, endPos, startPos );
339 // This was 0.
340 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
341 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
342 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
344 // B) search ASCII apostrophe in a text with ASCII apostrophes
345 str = str.replace(u'\u2019', '\'');
347 // search forward
348 aRes = m_xSearch->searchForward( str, startPos, endPos );
349 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
350 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
351 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
353 // search backwards
354 aRes = m_xSearch->searchBackward( str, endPos, startPos );
355 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
356 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
357 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
359 // C) search typographic apostrophe in a text with ASCII apostrophes (no result)
360 aOptions.searchString = u"\u2019"_ustr;
361 m_xSearch->setOptions( aOptions );
363 aRes = m_xSearch->searchForward( str, startPos, endPos );
364 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
366 aRes = m_xSearch->searchBackward( str, endPos, startPos );
367 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
369 // D) search typographic apostrophe in a text with typographic apostrophes
370 str = str.replace('\'', u'\u2019');
372 // search forward
373 aRes = m_xSearch->searchForward( str, startPos, endPos );
374 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
375 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
376 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
378 // search backwards
379 aRes = m_xSearch->searchBackward( str, endPos, startPos );
380 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
381 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
382 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
384 // E) search mixed apostrophes in a text with mixed apostrophes:
385 aOptions.searchString = u"'\u2019"_ustr;
386 m_xSearch->setOptions( aOptions );
387 str = u"test: \u2019'"_ustr;
389 // search forward
390 aRes = m_xSearch->searchForward( str, startPos, str.getLength());
391 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
393 // search backwards
394 aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
395 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
397 // F) search mixed apostrophes in a text with ASCII apostrophes:
398 str = u"test: ''"_ustr;
400 // search forward
401 aRes = m_xSearch->searchForward( str, startPos, str.getLength());
402 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
404 // search backwards
405 aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
406 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
409 void TestTextSearch::testQuotationMarkSearch()
411 // A) find typographic quotation marks also by using ASCII ones
412 OUString str( u"“x”, „y‟, ‘z’, ‚a‛"_ustr );
413 sal_Int32 startPos = 0, endPos = str.getLength();
415 // set options
416 util::SearchOptions aOptions;
417 aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
418 aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
419 aOptions.searchString = "\"x\"";
420 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
421 | TransliterationFlags::IGNORE_WIDTH);
422 m_xSearch->setOptions( aOptions );
424 util::SearchResult aRes;
426 // search forward
427 aRes = m_xSearch->searchForward( str, startPos, endPos );
428 // This was 0.
429 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
430 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.startOffset[0] );
431 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
433 // search backwards
434 aRes = m_xSearch->searchBackward( str, endPos, startPos );
435 // This was 0.
436 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
437 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
438 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.endOffset[0] );
440 // B)
441 aOptions.searchString = "\"y\"";
442 m_xSearch->setOptions( aOptions );
444 // search forward
445 aRes = m_xSearch->searchForward( str, startPos, endPos );
446 // This was 0.
447 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
448 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.startOffset[0] );
449 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.endOffset[0] );
451 // search backwards
452 aRes = m_xSearch->searchBackward( str, endPos, startPos );
453 // This was 0.
454 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
455 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.startOffset[0] );
456 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.endOffset[0] );
458 // C)
459 aOptions.searchString = "'z'";
460 m_xSearch->setOptions( aOptions );
462 // search forward
463 aRes = m_xSearch->searchForward( str, startPos, endPos );
464 // This was 0.
465 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
466 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.startOffset[0] );
467 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.endOffset[0] );
469 // search backwards
470 aRes = m_xSearch->searchBackward( str, endPos, startPos );
471 // This was 0.
472 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
473 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.startOffset[0] );
474 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.endOffset[0] );
476 // D)
477 aOptions.searchString = "'a'";
478 m_xSearch->setOptions( aOptions );
480 // search forward
481 aRes = m_xSearch->searchForward( str, startPos, endPos );
482 // This was 0.
483 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
484 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.startOffset[0] );
485 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.endOffset[0] );
487 // search backwards
488 aRes = m_xSearch->searchBackward( str, endPos, startPos );
489 // This was 0.
490 CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
491 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.startOffset[0] );
492 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.endOffset[0] );
495 void TestTextSearch::testTdf138410()
497 OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr);
498 sal_Int32 startPos = 0, endPos = str.getLength();
500 util::SearchOptions aOptions;
501 aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
503 util::SearchResult aRes;
505 // A) base alone
506 // The search string will be found whether it is followed by a mark in the
507 // text or not, and whether IGNORE_DIACRITICS_CTL is set or not.
509 // set options
510 aOptions.searchString = u"\u0643"_ustr;
511 aOptions.transliterateFlags = 0;
512 m_xSearch->setOptions(aOptions);
514 // search forward
515 aRes = m_xSearch->searchForward(str, startPos, endPos);
516 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
517 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
518 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
520 // search backwards
521 aRes = m_xSearch->searchBackward(str, endPos, startPos);
522 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
523 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
524 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
526 // check with transliteration
527 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
528 m_xSearch->setOptions(aOptions);
530 // search forward
531 aRes = m_xSearch->searchForward(str, startPos, endPos);
532 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
533 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
534 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
536 // search backwards
537 aRes = m_xSearch->searchBackward(str, endPos, startPos);
538 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
539 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
540 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
542 // b) base+mark
543 // The search string will be found when followed by a mark in the text, or
544 // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or
545 // not.
547 // set options
548 aOptions.searchString = u"\u0643\u064f"_ustr;
549 aOptions.transliterateFlags = 0;
550 m_xSearch->setOptions(aOptions);
552 // search forward
553 aRes = m_xSearch->searchForward(str, startPos, endPos);
554 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
555 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
556 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
558 // search backwards
559 aRes = m_xSearch->searchBackward(str, endPos, startPos);
560 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
561 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.startOffset[0]);
562 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.endOffset[0]);
564 // check with transliteration
565 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
566 m_xSearch->setOptions(aOptions);
568 // search forward
569 aRes = m_xSearch->searchForward(str, startPos, endPos);
570 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
571 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
572 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
574 // search backwards
575 aRes = m_xSearch->searchBackward(str, endPos, startPos);
576 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
577 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
578 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
580 // b) mark alone
581 // The search string will be found only when IGNORE_DIACRITICS_CTL is not
582 // set.
584 // set options
585 aOptions.searchString = u"\u064f"_ustr;
586 aOptions.transliterateFlags = 0;
587 m_xSearch->setOptions(aOptions);
589 // search forward
590 aRes = m_xSearch->searchForward(str, startPos, endPos);
591 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
592 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.startOffset[0]);
593 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
595 // search backwards
596 aRes = m_xSearch->searchBackward(str, endPos, startPos);
597 CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
598 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(4), aRes.startOffset[0]);
599 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.endOffset[0]);
601 // with ignore marks the mark will not be found
602 aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
603 m_xSearch->setOptions(aOptions);
605 // search forward
606 aRes = m_xSearch->searchForward(str, startPos, endPos);
607 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
609 // search backwards
610 aRes = m_xSearch->searchBackward(str, endPos, startPos);
611 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
614 void TestTextSearch::setUp()
616 BootstrapFixtureBase::setUp();
617 m_xSearch.set(m_xSFactory->createInstance(u"com.sun.star.util.TextSearch"_ustr), uno::UNO_QUERY_THROW);
618 m_xSearch2.set(m_xSFactory->createInstance(u"com.sun.star.util.TextSearch2"_ustr), uno::UNO_QUERY_THROW);
621 void TestTextSearch::tearDown()
623 m_xSearch.clear();
624 m_xSearch2.clear();
625 BootstrapFixtureBase::tearDown();
628 CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch);
630 CPPUNIT_PLUGIN_IMPLEMENT();
632 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */