1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/util/SearchFlags.hpp>
21 #include <com/sun/star/util/SearchOptions.hpp>
22 #include <com/sun/star/util/SearchAlgorithms2.hpp>
23 #include <com/sun/star/util/XTextSearch2.hpp>
24 #include <unotest/bootstrapfixturebase.hxx>
25 #include <i18nutil/transliteration.hxx>
27 #include <unicode/regex.h>
29 using namespace ::com::sun::star
;
31 class TestTextSearch
: public test::BootstrapFixtureBase
34 virtual void setUp() override
;
35 virtual void tearDown() override
;
39 void testWildcardSearch();
40 void testApostropheSearch();
41 void testQuotationMarkSearch();
44 CPPUNIT_TEST_SUITE(TestTextSearch
);
45 CPPUNIT_TEST(testICU
);
46 CPPUNIT_TEST(testSearches
);
47 CPPUNIT_TEST(testWildcardSearch
);
48 CPPUNIT_TEST(testApostropheSearch
);
49 CPPUNIT_TEST(testQuotationMarkSearch
);
50 CPPUNIT_TEST(testTdf138410
);
51 CPPUNIT_TEST_SUITE_END();
53 uno::Reference
<util::XTextSearch
> m_xSearch
;
54 uno::Reference
<util::XTextSearch2
> m_xSearch2
;
57 // Sanity check our ICU first ...
58 void TestTextSearch::testICU()
60 UErrorCode nErr
= U_ZERO_ERROR
;
61 sal_uInt32 nSearchFlags
= UREGEX_UWORD
| UREGEX_CASE_INSENSITIVE
;
63 OUString
aString( u
"abcdefgh"_ustr
);
64 OUString
aPattern( u
"e"_ustr
);
65 icu::UnicodeString
aSearchPat( reinterpret_cast<const UChar
*>(aPattern
.getStr()), aPattern
.getLength() );
67 std::unique_ptr
<icu::RegexMatcher
> pRegexMatcher(new icu::RegexMatcher( aSearchPat
, nSearchFlags
, nErr
));
69 icu::UnicodeString
aSource( reinterpret_cast<const UChar
*>(aString
.getStr()), aString
.getLength() );
70 pRegexMatcher
->reset( aSource
);
72 CPPUNIT_ASSERT( pRegexMatcher
->find( 0, nErr
) );
73 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR
, nErr
);
74 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(4), pRegexMatcher
->start( nErr
) );
75 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR
, nErr
);
76 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(5), pRegexMatcher
->end( nErr
) );
77 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR
, nErr
);
79 OUString
aString2( u
"acababaabcababadcdaa"_ustr
);
80 OUString
aPattern2( u
"a"_ustr
);
82 icu::UnicodeString
aSearchPat2( reinterpret_cast<const UChar
*>(aPattern2
.getStr()), aPattern2
.getLength() );
83 pRegexMatcher
.reset(new icu::RegexMatcher( aSearchPat2
, nSearchFlags
, nErr
));
85 icu::UnicodeString
aSource2( reinterpret_cast<const UChar
*>(aString2
.getStr()), aString2
.getLength() );
86 pRegexMatcher
->reset( aSource2
);
88 CPPUNIT_ASSERT( pRegexMatcher
->find( 0, nErr
) );
89 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR
, nErr
);
90 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(0), pRegexMatcher
->start( nErr
) );
91 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR
, nErr
);
92 CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(1), pRegexMatcher
->end( nErr
) );
93 CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR
, nErr
);
96 void TestTextSearch::testSearches()
98 OUString
str( u
"acababaabcababadcdaa"_ustr
);
99 sal_Int32 startPos
= 2, endPos
= 20 ;
100 sal_Int32
const fStartRes
= 10, fEndRes
= 18 ;
101 sal_Int32
const bStartRes
= 18, bEndRes
= 10 ;
104 util::SearchOptions aOptions
;
105 aOptions
.algorithmType
= util::SearchAlgorithms_REGEXP
;
106 aOptions
.searchFlag
= util::SearchFlags::ALL_IGNORE_CASE
;
107 aOptions
.searchString
= "(ab)*a(c|d)+";
108 m_xSearch
->setOptions( aOptions
);
110 util::SearchResult aRes
;
113 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
114 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
115 CPPUNIT_ASSERT_EQUAL( fStartRes
, aRes
.startOffset
[0] );
116 CPPUNIT_ASSERT_EQUAL( fEndRes
, aRes
.endOffset
[0] );
119 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
120 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
121 CPPUNIT_ASSERT_EQUAL( bStartRes
, aRes
.startOffset
[0] );
122 CPPUNIT_ASSERT_EQUAL( bEndRes
, aRes
.endOffset
[0] );
124 aOptions
.transliterateFlags
= static_cast<int>(TransliterationFlags::IGNORE_CASE
125 | TransliterationFlags::IGNORE_WIDTH
);
126 aOptions
.searchString
= "([^ ]*)[ ]*([^ ]*)";
127 m_xSearch
->setOptions(aOptions
);
128 aRes
= m_xSearch
->searchForward(u
"11 22 33"_ustr
, 2, 7);
129 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(3), aRes
.subRegExpressions
);
130 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes
.startOffset
[0]);
131 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.endOffset
[0]);
132 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes
.startOffset
[1]);
133 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes
.endOffset
[1]);
134 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes
.startOffset
[2]);
135 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.endOffset
[2]);
138 void TestTextSearch::testWildcardSearch()
140 util::SearchOptions2 aOptions
;
142 util::SearchResult aRes
;
144 aOptions
.AlgorithmType2
= util::SearchAlgorithms2::WILDCARD
;
145 aOptions
.WildcardEscapeCharacter
= '~';
146 // aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION;
147 // is not set, so substring match is allowed.
148 aOptions
.transliterateFlags
= sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE
);
151 aOptions
.searchString
= "a";
152 m_xSearch2
->setOptions2( aOptions
);
153 // match first "a", [0,1)
154 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
155 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
156 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.startOffset
[0]);
157 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.endOffset
[0]);
158 // match last "a", (5,4]
159 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
160 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
161 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.startOffset
[0]);
162 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.endOffset
[0]);
164 aOptions
.searchString
= "a?";
165 m_xSearch2
->setOptions2( aOptions
);
167 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
168 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
169 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.startOffset
[0]);
170 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes
.endOffset
[0]);
172 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
173 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
174 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.startOffset
[0]);
175 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes
.endOffset
[0]);
177 aOptions
.searchString
= "a*c";
178 m_xSearch2
->setOptions2( aOptions
);
179 // match "abac", [0,4) XXX NOTE: first match forward
180 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
181 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
182 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.startOffset
[0]);
183 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.endOffset
[0]);
184 // match "ac", (4,2] XXX NOTE: first match backward, not greedy
185 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
186 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
187 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.startOffset
[0]);
188 CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes
.endOffset
[0]);
190 aOptions
.searchString
= "b*a";
191 m_xSearch2
->setOptions2( aOptions
);
192 // match "ba", [1,3) XXX NOTE: first match forward, not greedy
193 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
194 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
195 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.startOffset
[0]);
196 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes
.endOffset
[0]);
197 // match "baca", (5,1] XXX NOTE: first match backward
198 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
199 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
200 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.startOffset
[0]);
201 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.endOffset
[0]);
205 aOptions
.searchString
= "?~??";
206 m_xSearch2
->setOptions2( aOptions
);
207 // match "b?c", [1,4)
208 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
209 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
210 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.startOffset
[0]);
211 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.endOffset
[0]);
212 // match "b?c", (4,1]
213 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
214 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
215 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.startOffset
[0]);
216 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.endOffset
[0]);
220 aOptions
.searchString
= "?~*?";
221 m_xSearch2
->setOptions2( aOptions
);
222 // match "b?c", [1,4)
223 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
224 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
225 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.startOffset
[0]);
226 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.endOffset
[0]);
227 // match "b?c", (4,1]
228 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
229 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
230 CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes
.startOffset
[0]);
231 CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes
.endOffset
[0]);
233 aOptions
.searchString
= "ca?";
234 m_xSearch2
->setOptions2( aOptions
);
236 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
237 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.subRegExpressions
);
239 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
240 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.subRegExpressions
);
242 aOptions
.searchString
= "ca*";
243 m_xSearch2
->setOptions2( aOptions
);
245 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
246 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
247 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes
.startOffset
[0]);
248 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.endOffset
[0]);
250 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
251 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
252 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.startOffset
[0]);
253 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes
.endOffset
[0]);
255 aOptions
.searchString
= "*ca*";
256 m_xSearch2
->setOptions2( aOptions
);
257 // match "abaca", [0,5)
258 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
259 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
260 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.startOffset
[0]);
261 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.endOffset
[0]);
262 // match "abaca", (5,0]
263 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
264 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
265 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes
.startOffset
[0]);
266 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.endOffset
[0]);
269 aOptions
.searchString
= "*2?";
270 m_xSearch2
->setOptions2( aOptions
);
271 // match first "123", [0,3)
272 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
273 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
274 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.startOffset
[0]);
275 CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes
.endOffset
[0]);
276 // match "123123", (6,0] Yes this looks odd, but it is as searching "?2*" forward.
277 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
278 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
279 CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes
.startOffset
[0]);
280 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.endOffset
[0]);
282 aOptions
.searchFlag
|= util::SearchFlags::WILD_MATCH_SELECTION
;
283 m_xSearch2
->setOptions2( aOptions
);
284 // match "123123", [0,6) with greedy '*'
285 aRes
= m_xSearch2
->searchForward( aText
, 0, aText
.getLength());
286 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
287 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.startOffset
[0]);
288 CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes
.endOffset
[0]);
289 // match "123123", (6,0]
290 aRes
= m_xSearch2
->searchBackward( aText
, aText
.getLength(), 0);
291 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.subRegExpressions
);
292 CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes
.startOffset
[0]);
293 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes
.endOffset
[0]);
296 void TestTextSearch::testApostropheSearch()
298 // A) find typographic apostrophes also by using ASCII apostrophe in searchString
299 OUString
str( u
"It\u2019s an apostrophe."_ustr
);
300 sal_Int32 startPos
= 0, endPos
= str
.getLength();
303 util::SearchOptions aOptions
;
304 aOptions
.algorithmType
= util::SearchAlgorithms_ABSOLUTE
;
305 aOptions
.searchFlag
= util::SearchFlags::ALL_IGNORE_CASE
;
306 aOptions
.searchString
= "'";
307 m_xSearch
->setOptions( aOptions
);
309 util::SearchResult aRes
;
312 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
314 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
315 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.startOffset
[0] );
316 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.endOffset
[0] );
319 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
321 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
322 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.startOffset
[0] );
323 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.endOffset
[0] );
325 // check with transliteration
326 aOptions
.transliterateFlags
= static_cast<int>(TransliterationFlags::IGNORE_CASE
327 | TransliterationFlags::IGNORE_WIDTH
);
328 m_xSearch
->setOptions(aOptions
);
331 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
333 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
334 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.startOffset
[0] );
335 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.endOffset
[0] );
338 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
340 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
341 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.startOffset
[0] );
342 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.endOffset
[0] );
344 // B) search ASCII apostrophe in a text with ASCII apostrophes
345 str
= str
.replace(u
'\u2019', '\'');
348 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
349 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
350 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.startOffset
[0] );
351 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.endOffset
[0] );
354 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
355 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
356 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.startOffset
[0] );
357 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.endOffset
[0] );
359 // C) search typographic apostrophe in a text with ASCII apostrophes (no result)
360 aOptions
.searchString
= u
"\u2019"_ustr
;
361 m_xSearch
->setOptions( aOptions
);
363 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
364 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.subRegExpressions
);
366 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
367 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.subRegExpressions
);
369 // D) search typographic apostrophe in a text with typographic apostrophes
370 str
= str
.replace('\'', u
'\u2019');
373 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
374 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
375 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.startOffset
[0] );
376 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.endOffset
[0] );
379 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
380 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
381 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.startOffset
[0] );
382 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(2), aRes
.endOffset
[0] );
384 // E) search mixed apostrophes in a text with mixed apostrophes:
385 aOptions
.searchString
= u
"'\u2019"_ustr
;
386 m_xSearch
->setOptions( aOptions
);
387 str
= u
"test: \u2019'"_ustr
;
390 aRes
= m_xSearch
->searchForward( str
, startPos
, str
.getLength());
391 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
394 aRes
= m_xSearch
->searchBackward( str
, str
.getLength(), startPos
);
395 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
397 // F) search mixed apostrophes in a text with ASCII apostrophes:
398 str
= u
"test: ''"_ustr
;
401 aRes
= m_xSearch
->searchForward( str
, startPos
, str
.getLength());
402 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
405 aRes
= m_xSearch
->searchBackward( str
, str
.getLength(), startPos
);
406 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
409 void TestTextSearch::testQuotationMarkSearch()
411 // A) find typographic quotation marks also by using ASCII ones
412 OUString
str( u
"“x”, „y‟, ‘z’, ‚a‛"_ustr
);
413 sal_Int32 startPos
= 0, endPos
= str
.getLength();
416 util::SearchOptions aOptions
;
417 aOptions
.algorithmType
= util::SearchAlgorithms_ABSOLUTE
;
418 aOptions
.searchFlag
= util::SearchFlags::ALL_IGNORE_CASE
;
419 aOptions
.searchString
= "\"x\"";
420 aOptions
.transliterateFlags
= static_cast<int>(TransliterationFlags::IGNORE_CASE
421 | TransliterationFlags::IGNORE_WIDTH
);
422 m_xSearch
->setOptions( aOptions
);
424 util::SearchResult aRes
;
427 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
429 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
430 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(0), aRes
.startOffset
[0] );
431 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.endOffset
[0] );
434 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
436 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
437 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(3), aRes
.startOffset
[0] );
438 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(0), aRes
.endOffset
[0] );
441 aOptions
.searchString
= "\"y\"";
442 m_xSearch
->setOptions( aOptions
);
445 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
447 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
448 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(5), aRes
.startOffset
[0] );
449 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(8), aRes
.endOffset
[0] );
452 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
454 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
455 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(8), aRes
.startOffset
[0] );
456 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(5), aRes
.endOffset
[0] );
459 aOptions
.searchString
= "'z'";
460 m_xSearch
->setOptions( aOptions
);
463 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
465 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
466 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(10), aRes
.startOffset
[0] );
467 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(13), aRes
.endOffset
[0] );
470 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
472 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
473 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(13), aRes
.startOffset
[0] );
474 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(10), aRes
.endOffset
[0] );
477 aOptions
.searchString
= "'a'";
478 m_xSearch
->setOptions( aOptions
);
481 aRes
= m_xSearch
->searchForward( str
, startPos
, endPos
);
483 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
484 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(15), aRes
.startOffset
[0] );
485 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(18), aRes
.endOffset
[0] );
488 aRes
= m_xSearch
->searchBackward( str
, endPos
, startPos
);
490 CPPUNIT_ASSERT( aRes
.subRegExpressions
> 0 );
491 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(18), aRes
.startOffset
[0] );
492 CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32
>(15), aRes
.endOffset
[0] );
495 void TestTextSearch::testTdf138410()
497 OUString
str(u
"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr
);
498 sal_Int32 startPos
= 0, endPos
= str
.getLength();
500 util::SearchOptions aOptions
;
501 aOptions
.algorithmType
= util::SearchAlgorithms_ABSOLUTE
;
503 util::SearchResult aRes
;
506 // The search string will be found whether it is followed by a mark in the
507 // text or not, and whether IGNORE_DIACRITICS_CTL is set or not.
510 aOptions
.searchString
= u
"\u0643"_ustr
;
511 aOptions
.transliterateFlags
= 0;
512 m_xSearch
->setOptions(aOptions
);
515 aRes
= m_xSearch
->searchForward(str
, startPos
, endPos
);
516 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
517 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.startOffset
[0]);
518 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.endOffset
[0]);
521 aRes
= m_xSearch
->searchBackward(str
, endPos
, startPos
);
522 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
523 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(7), aRes
.startOffset
[0]);
524 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(6), aRes
.endOffset
[0]);
526 // check with transliteration
527 aOptions
.transliterateFlags
= static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL
);
528 m_xSearch
->setOptions(aOptions
);
531 aRes
= m_xSearch
->searchForward(str
, startPos
, endPos
);
532 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
533 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.startOffset
[0]);
534 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.endOffset
[0]);
537 aRes
= m_xSearch
->searchBackward(str
, endPos
, startPos
);
538 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
539 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(7), aRes
.startOffset
[0]);
540 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(6), aRes
.endOffset
[0]);
543 // The search string will be found when followed by a mark in the text, or
544 // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or
548 aOptions
.searchString
= u
"\u0643\u064f"_ustr
;
549 aOptions
.transliterateFlags
= 0;
550 m_xSearch
->setOptions(aOptions
);
553 aRes
= m_xSearch
->searchForward(str
, startPos
, endPos
);
554 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
555 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.startOffset
[0]);
556 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(2), aRes
.endOffset
[0]);
559 aRes
= m_xSearch
->searchBackward(str
, endPos
, startPos
);
560 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
561 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(2), aRes
.startOffset
[0]);
562 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.endOffset
[0]);
564 // check with transliteration
565 aOptions
.transliterateFlags
= static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL
);
566 m_xSearch
->setOptions(aOptions
);
569 aRes
= m_xSearch
->searchForward(str
, startPos
, endPos
);
570 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
571 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.startOffset
[0]);
572 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.endOffset
[0]);
575 aRes
= m_xSearch
->searchBackward(str
, endPos
, startPos
);
576 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
577 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(7), aRes
.startOffset
[0]);
578 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(6), aRes
.endOffset
[0]);
581 // The search string will be found only when IGNORE_DIACRITICS_CTL is not
585 aOptions
.searchString
= u
"\u064f"_ustr
;
586 aOptions
.transliterateFlags
= 0;
587 m_xSearch
->setOptions(aOptions
);
590 aRes
= m_xSearch
->searchForward(str
, startPos
, endPos
);
591 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
592 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(1), aRes
.startOffset
[0]);
593 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(2), aRes
.endOffset
[0]);
596 aRes
= m_xSearch
->searchBackward(str
, endPos
, startPos
);
597 CPPUNIT_ASSERT(aRes
.subRegExpressions
> 0);
598 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(4), aRes
.startOffset
[0]);
599 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(3), aRes
.endOffset
[0]);
601 // with ignore marks the mark will not be found
602 aOptions
.transliterateFlags
= static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL
);
603 m_xSearch
->setOptions(aOptions
);
606 aRes
= m_xSearch
->searchForward(str
, startPos
, endPos
);
607 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.subRegExpressions
);
610 aRes
= m_xSearch
->searchBackward(str
, endPos
, startPos
);
611 CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32
>(0), aRes
.subRegExpressions
);
614 void TestTextSearch::setUp()
616 BootstrapFixtureBase::setUp();
617 m_xSearch
.set(m_xSFactory
->createInstance(u
"com.sun.star.util.TextSearch"_ustr
), uno::UNO_QUERY_THROW
);
618 m_xSearch2
.set(m_xSFactory
->createInstance(u
"com.sun.star.util.TextSearch2"_ustr
), uno::UNO_QUERY_THROW
);
621 void TestTextSearch::tearDown()
625 BootstrapFixtureBase::tearDown();
628 CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch
);
630 CPPUNIT_PLUGIN_IMPLEMENT();
632 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */