1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <com/sun/star/i18n/XBreakIterator.hpp>
11 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
12 #include <com/sun/star/i18n/ScriptType.hpp>
13 #include <com/sun/star/i18n/WordType.hpp>
14 #include <unotest/bootstrapfixturebase.hxx>
16 #include <unicode/uvernum.h>
18 #include <rtl/strbuf.hxx>
24 using namespace ::com::sun::star
;
26 class TestBreakIterator
: public test::BootstrapFixtureBase
29 virtual void setUp() override
;
30 virtual void tearDown() override
;
32 void testLineBreaking();
33 void testWordBoundaries();
34 void testGraphemeIteration();
38 #if (U_ICU_VERSION_MAJOR_NUM > 51)
41 void testNorthernThai();
48 CPPUNIT_TEST_SUITE(TestBreakIterator
);
49 CPPUNIT_TEST(testLineBreaking
);
50 CPPUNIT_TEST(testWordBoundaries
);
51 CPPUNIT_TEST(testGraphemeIteration
);
52 CPPUNIT_TEST(testWeak
);
53 CPPUNIT_TEST(testAsian
);
54 CPPUNIT_TEST(testThai
);
55 #if (U_ICU_VERSION_MAJOR_NUM > 51)
56 CPPUNIT_TEST(testLao
);
58 CPPUNIT_TEST(testKhmer
);
59 CPPUNIT_TEST(testNorthernThai
);
62 CPPUNIT_TEST(testJapanese
);
63 CPPUNIT_TEST(testChinese
);
64 CPPUNIT_TEST_SUITE_END();
67 uno::Reference
<i18n::XBreakIterator
> m_xBreak
;
68 void doTestJapanese(uno::Reference
< i18n::XBreakIterator
> const &xBreak
);
71 void TestBreakIterator::testLineBreaking()
73 i18n::LineBreakHyphenationOptions aHyphOptions
;
74 i18n::LineBreakUserOptions aUserOptions
;
77 //See https://bugs.libreoffice.org/show_bug.cgi?id=31271
79 OUString
aTest("(some text here)");
81 aLocale
.Language
= "en";
82 aLocale
.Country
= "US";
85 //Here we want the line break to leave text here) on the next line
86 i18n::LineBreakResults aResult
= m_xBreak
->getLineBreak(aTest
, strlen("(some tex"), aLocale
, 0, aHyphOptions
, aUserOptions
);
87 CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", static_cast<sal_Int32
>(6), aResult
.breakIndex
);
91 //Here we want the line break to leave "here)" on the next line
92 i18n::LineBreakResults aResult
= m_xBreak
->getLineBreak(aTest
, strlen("(some text here"), aLocale
, 0, aHyphOptions
, aUserOptions
);
93 CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", static_cast<sal_Int32
>(11), aResult
.breakIndex
);
97 //See https://bugs.libreoffice.org/show_bug.cgi?id=49849
99 const sal_Unicode HEBREW1
[] = { 0x05DE, 0x05D9, 0x05DC, 0x05D9, 0x5DD };
100 OUString
aWord(HEBREW1
, SAL_N_ELEMENTS(HEBREW1
));
101 OUString
aTest(aWord
+ " " + aWord
);
103 aLocale
.Language
= "he";
104 aLocale
.Country
= "IL";
107 //Here we want the line break to happen at the whitespace
108 i18n::LineBreakResults aResult
= m_xBreak
->getLineBreak(aTest
, aTest
.getLength()-1, aLocale
, 0, aHyphOptions
, aUserOptions
);
109 CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", aWord
.getLength()+1, aResult
.breakIndex
);
113 //See https://bz.apache.org/ooo/show_bug.cgi?id=17155
115 OUString
const aTest("foo /bar/baz");
117 aLocale
.Language
= "en";
118 aLocale
.Country
= "US";
121 //Here we want the line break to leave /bar/ba clumped together on the next line
122 i18n::LineBreakResults aResult
= m_xBreak
->getLineBreak(aTest
, strlen("foo /bar/ba"), aLocale
, 0,
123 aHyphOptions
, aUserOptions
);
124 CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first slash", static_cast<sal_Int32
>(4), aResult
.breakIndex
);
128 //See https://bz.apache.org/ooo/show_bug.cgi?id=19716
130 OUString
aTest("aaa]aaa");
132 aLocale
.Language
= "en";
133 aLocale
.Country
= "US";
136 //Here we want the line break to move the whole lot to the next line
137 i18n::LineBreakResults aResult
= m_xBreak
->getLineBreak(aTest
, aTest
.getLength()-2, aLocale
, 0,
138 aHyphOptions
, aUserOptions
);
139 CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the line, not at ]", static_cast<sal_Int32
>(0), aResult
.breakIndex
);
143 //this is an example sequence from tdf92993-1.docx caught by the load crashtesting
145 const sal_Unicode WEIRD1
[] = { 0xd83c, 0xdf56, 0xd83c, 0xdf57, 0xd83c, 0xdf46,
146 0xd83c, 0xdf64, 0x2668, 0xfe0f, 0xd83c, 0xdfc6};
148 OUString
aTest(WEIRD1
, SAL_N_ELEMENTS(WEIRD1
));
150 aLocale
.Language
= "en";
151 aLocale
.Country
= "US";
154 //This must not assert/crash
155 (void)m_xBreak
->getLineBreak(aTest
, 0, aLocale
, 0, aHyphOptions
, aUserOptions
);
159 //See https://bugs.documentfoundation.org/show_bug.cgi?id=96197
161 const sal_Unicode HANGUL
[] = { 0xc560, 0xad6D, 0xac00, 0xc758, 0x0020, 0xac00,
163 OUString
aTest(HANGUL
, SAL_N_ELEMENTS(HANGUL
));
165 aLocale
.Language
= "ko";
166 aLocale
.Country
= "KR";
169 i18n::LineBreakResults aResult
= m_xBreak
->getLineBreak(aTest
, aTest
.getLength()-2, aLocale
, 0,
170 aHyphOptions
, aUserOptions
);
171 CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast<sal_Int32
>(5), aResult
.breakIndex
);
176 //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
177 void TestBreakIterator::testWordBoundaries()
179 lang::Locale aLocale
;
180 aLocale
.Language
= "en";
181 aLocale
.Country
= "US";
183 i18n::Boundary aBounds
;
185 //See https://bz.apache.org/ooo/show_bug.cgi?id=11993
187 OUString
aTest("abcd ef ghi??? KLM");
189 CPPUNIT_ASSERT(!m_xBreak
->isBeginWord(aTest
, 4, aLocale
, i18n::WordType::DICTIONARY_WORD
));
190 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, 4, aLocale
, i18n::WordType::DICTIONARY_WORD
));
191 aBounds
= m_xBreak
->getWordBoundary(aTest
, 4, aLocale
, i18n::WordType::DICTIONARY_WORD
, true);
192 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 4);
194 CPPUNIT_ASSERT(!m_xBreak
->isBeginWord(aTest
, 8, aLocale
, i18n::WordType::DICTIONARY_WORD
));
195 CPPUNIT_ASSERT(!m_xBreak
->isEndWord(aTest
, 8, aLocale
, i18n::WordType::DICTIONARY_WORD
));
198 aBounds
= m_xBreak
->getWordBoundary(aTest
, 8, aLocale
, i18n::WordType::DICTIONARY_WORD
, true);
199 CPPUNIT_ASSERT(aBounds
.startPos
== 9 && aBounds
.endPos
== 12);
202 aBounds
= m_xBreak
->getWordBoundary(aTest
, 8, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
203 CPPUNIT_ASSERT(aBounds
.startPos
== 5 && aBounds
.endPos
== 7);
205 CPPUNIT_ASSERT(!m_xBreak
->isBeginWord(aTest
, 12, aLocale
, i18n::WordType::DICTIONARY_WORD
));
206 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, 12, aLocale
, i18n::WordType::DICTIONARY_WORD
));
207 aBounds
= m_xBreak
->getWordBoundary(aTest
, 12, aLocale
, i18n::WordType::DICTIONARY_WORD
, true);
208 CPPUNIT_ASSERT(aBounds
.startPos
== 9 && aBounds
.endPos
== 12);
210 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, 16, aLocale
, i18n::WordType::DICTIONARY_WORD
));
211 CPPUNIT_ASSERT(!m_xBreak
->isEndWord(aTest
, 16, aLocale
, i18n::WordType::DICTIONARY_WORD
));
212 aBounds
= m_xBreak
->getWordBoundary(aTest
, 16, aLocale
, i18n::WordType::DICTIONARY_WORD
, true);
213 CPPUNIT_ASSERT(aBounds
.startPos
== 16 && aBounds
.endPos
== 19);
216 //See https://bz.apache.org/ooo/show_bug.cgi?id=21907
218 OUString
aTest("b a?");
220 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, 1, aLocale
, i18n::WordType::ANY_WORD
));
221 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, 2, aLocale
, i18n::WordType::ANY_WORD
));
222 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, 3, aLocale
, i18n::WordType::ANY_WORD
));
224 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, 3, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
));
226 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, 1, aLocale
, i18n::WordType::ANY_WORD
));
227 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, 2, aLocale
, i18n::WordType::ANY_WORD
));
228 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, 3, aLocale
, i18n::WordType::ANY_WORD
));
230 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, 3, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
));
233 //See https://bz.apache.org/ooo/show_bug.cgi?id=14904
235 const sal_Unicode TEST
[] =
237 'W', 'o', 'r', 'k', 'i', 'n', 'g', ' ', 0x201C, 'W', 'o', 'r', 'd', 's',
238 ' ', 's', 't', 'a', 'r', 't', 'i', 'n', 'g', ' ', 'w', 'i', 't',
239 'h', ' ', 'q', 'u', 'o', 't', 'e', 's', 0x201D, ' ', 'W', 'o', 'r', 'k',
240 'i', 'n', 'g', ' ', 0x2018, 'B', 'r', 'o', 'k', 'e', 'n', 0x2019, ' ',
241 '?', 'S', 'p', 'a', 'n', 'i', 's', 'h', '?', ' ', 'd', 'o', 'e',
242 's', 'n', 0x2019, 't', ' ', 'w', 'o', 'r', 'k', '.', ' ', 'N', 'o',
243 't', ' ', 'e', 'v', 'e', 'n', ' ' , 0x00BF, 'r', 'e', 'a', 'l', '?', ' ',
244 'S', 'p', 'a', 'n', 'i', 's', 'h'
246 OUString
aTest(TEST
, SAL_N_ELEMENTS(TEST
));
248 aBounds
= m_xBreak
->getWordBoundary(aTest
, 4, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
249 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 7);
251 aBounds
= m_xBreak
->getWordBoundary(aTest
, 12, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
252 CPPUNIT_ASSERT(aBounds
.startPos
== 9 && aBounds
.endPos
== 14);
254 aBounds
= m_xBreak
->getWordBoundary(aTest
, 40, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
255 CPPUNIT_ASSERT(aBounds
.startPos
== 37 && aBounds
.endPos
== 44);
257 aBounds
= m_xBreak
->getWordBoundary(aTest
, 49, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
258 CPPUNIT_ASSERT(aBounds
.startPos
== 46 && aBounds
.endPos
== 52);
260 aBounds
= m_xBreak
->getWordBoundary(aTest
, 58, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
261 CPPUNIT_ASSERT(aBounds
.startPos
== 55 && aBounds
.endPos
== 62);
263 aBounds
= m_xBreak
->getWordBoundary(aTest
, 67, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
264 CPPUNIT_ASSERT(aBounds
.startPos
== 64 && aBounds
.endPos
== 71);
266 aBounds
= m_xBreak
->getWordBoundary(aTest
, 90, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
267 CPPUNIT_ASSERT(aBounds
.startPos
== 88 && aBounds
.endPos
== 92);
270 //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
271 sal_Unicode aBreakTests
[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
272 for (int mode
= i18n::WordType::ANY_WORD
; mode
<= i18n::WordType::WORD_COUNT
; ++mode
)
274 //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
275 for (size_t i
= 0; i
< SAL_N_ELEMENTS(aBreakTests
); ++i
)
277 #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
278 //Note the breakiterator test is known to fail on older icu
279 //versions (4.2.1) for the 200B (ZWSP) Zero Width Space testcase.
280 if (aBreakTests
[i
] == 0x200B)
283 OUString aTest
= "Word" + OUStringChar(aBreakTests
[i
]) + "Word";
284 aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
, mode
, true);
287 case i18n::WordType::ANY_WORD
:
288 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 4);
290 case i18n::WordType::ANYWORD_IGNOREWHITESPACES
:
291 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 4);
293 case i18n::WordType::DICTIONARY_WORD
:
294 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 4);
296 case i18n::WordType::WORD_COUNT
:
297 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 4);
301 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, aBounds
.startPos
, aLocale
, mode
));
302 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, aBounds
.endPos
, aLocale
, mode
));
306 sal_Unicode aJoinTests
[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
307 for (int mode
= i18n::WordType::ANY_WORD
; mode
<= i18n::WordType::WORD_COUNT
; ++mode
)
309 //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
310 for (size_t i
= 0; i
< SAL_N_ELEMENTS(aJoinTests
); ++i
)
312 OUString aTest
= "Word" + OUStringChar(aJoinTests
[i
]) + "Word";
313 aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
, mode
, true);
316 case i18n::WordType::ANY_WORD
:
317 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 9);
319 case i18n::WordType::ANYWORD_IGNOREWHITESPACES
:
320 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 9);
322 case i18n::WordType::DICTIONARY_WORD
:
323 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 9);
325 case i18n::WordType::WORD_COUNT
:
326 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 9);
330 CPPUNIT_ASSERT(m_xBreak
->isBeginWord(aTest
, aBounds
.startPos
, aLocale
, mode
));
331 CPPUNIT_ASSERT(m_xBreak
->isEndWord(aTest
, aBounds
.endPos
, aLocale
, mode
));
335 //See https://bz.apache.org/ooo/show_bug.cgi?id=13494
337 const OUString
aBase("xxAAxxBBxxCCxx");
338 const sal_Unicode aTests
[] =
340 '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
341 '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
342 '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
345 const sal_Int32 aDoublePositions
[] = {0, 2, 4, 6, 8, 10, 12, 14};
346 for (size_t j
= 0; j
< SAL_N_ELEMENTS(aTests
); ++j
)
348 OUString aTest
= aBase
.replace('x', aTests
[j
]);
353 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aDoublePositions
));
354 nPos
= m_xBreak
->nextWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
355 CPPUNIT_ASSERT_EQUAL(aDoublePositions
[i
], nPos
);
358 while (nPos
< aTest
.getLength());
359 nPos
= aTest
.getLength();
360 i
= SAL_N_ELEMENTS(aDoublePositions
)-1;
363 nPos
= m_xBreak
->previousWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
365 CPPUNIT_ASSERT_EQUAL(aDoublePositions
[i
], nPos
);
370 const sal_Int32 aSinglePositions
[] = {0, 1, 3, 4, 6, 7, 9, 10};
371 for (size_t j
= 1; j
< SAL_N_ELEMENTS(aTests
); ++j
)
373 OUString aTest
= aBase
.replaceAll("xx", OUStringChar(aTests
[j
]));
378 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aSinglePositions
));
379 nPos
= m_xBreak
->nextWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
380 CPPUNIT_ASSERT_EQUAL(aSinglePositions
[i
], nPos
);
383 while (nPos
< aTest
.getLength());
384 nPos
= aTest
.getLength();
385 i
= SAL_N_ELEMENTS(aSinglePositions
)-1;
388 nPos
= m_xBreak
->previousWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
390 CPPUNIT_ASSERT_EQUAL(aSinglePositions
[i
], nPos
);
395 const sal_Int32 aSingleQuotePositions
[] = {0, 1, 9, 10};
396 CPPUNIT_ASSERT_EQUAL(u
'\'', aTests
[0]);
398 OUString aTest
= aBase
.replaceAll("xx", OUStringChar(aTests
[0]));
403 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aSingleQuotePositions
));
404 nPos
= m_xBreak
->nextWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
405 CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions
[i
], nPos
);
408 while (nPos
< aTest
.getLength());
409 nPos
= aTest
.getLength();
410 i
= SAL_N_ELEMENTS(aSingleQuotePositions
)-1;
413 nPos
= m_xBreak
->previousWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
415 CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions
[i
], nPos
);
421 //See https://bz.apache.org/ooo/show_bug.cgi?id=13451
423 aLocale
.Language
= "ca";
424 aLocale
.Country
= "ES";
426 OUString
aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
429 sal_Int32 aExpected
[] = {8, 20, 30, 34, 39, 51, 52};
433 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aExpected
));
434 nPos
= m_xBreak
->getWordBoundary(aTest
, nPos
, aLocale
,
435 i18n::WordType::DICTIONARY_WORD
, true).endPos
;
436 CPPUNIT_ASSERT_EQUAL(aExpected
[i
], nPos
);
439 while (nPos
++ < aTest
.getLength());
440 CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected
), i
);
443 //See https://bz.apache.org/ooo/show_bug.cgi?id=85411
444 for (int j
= 0; j
< 3; ++j
)
449 aLocale
.Language
= "en";
450 aLocale
.Country
= "US";
453 aLocale
.Language
= "ca";
454 aLocale
.Country
= "ES";
457 aLocale
.Language
= "fi";
458 aLocale
.Country
= "FI";
461 CPPUNIT_ASSERT(false);
465 const sal_Unicode TEST
[] =
467 'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
469 OUString
aTest(TEST
, SAL_N_ELEMENTS(TEST
));
472 sal_Int32 aExpected
[] = {1, 6, 9, 12};
476 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aExpected
));
477 nPos
= m_xBreak
->getWordBoundary(aTest
, nPos
, aLocale
,
478 i18n::WordType::DICTIONARY_WORD
, true).endPos
;
479 CPPUNIT_ASSERT_EQUAL(aExpected
[i
], nPos
);
482 while (nPos
++ < aTest
.getLength());
483 CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected
), i
);
486 //https://bz.apache.org/ooo/show_bug.cgi?id=21290
487 for (int j
= 0; j
< 2; ++j
)
492 aLocale
.Language
= "en";
493 aLocale
.Country
= "US";
496 aLocale
.Language
= "grc";
497 aLocale
.Country
.clear();
500 CPPUNIT_ASSERT(false);
504 const sal_Unicode TEST
[] =
506 0x1F0C, 0x03BD, 0x03B4, 0x03C1, 0x03B1, 0x0020, 0x1F00,
507 0x03C1, 0x03BD, 0x1F7B, 0x03BC, 0x03B5, 0x03BD, 0x03BF,
508 0x03C2, 0x0020, 0x1F00, 0x03BB, 0x03BB, 0x0020, 0x1F24,
509 0x03C3, 0x03B8, 0x03B9, 0x03BF, 0x03BD
511 OUString
aTest(TEST
, SAL_N_ELEMENTS(TEST
));
514 sal_Int32 aExpected
[] = {5, 15, 19, 26};
518 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aExpected
));
519 nPos
= m_xBreak
->getWordBoundary(aTest
, nPos
, aLocale
,
520 i18n::WordType::DICTIONARY_WORD
, true).endPos
;
521 CPPUNIT_ASSERT_EQUAL(aExpected
[i
], nPos
);
524 while (nPos
++ < aTest
.getLength());
525 CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected
), i
);
528 //See https://bz.apache.org/ooo/show_bug.cgi?id=58513
529 //See https://bugs.libreoffice.org/show_bug.cgi?id=55707
531 aLocale
.Language
= "fi";
532 aLocale
.Country
= "FI";
534 OUString
aTest("Kuorma-auto kaakkois- ja Keski-Suomi USA:n 90:n %:n");
538 sal_Int32 aExpected
[] = {11, 21, 24, 36, 42, 47, 51};
542 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aExpected
));
543 nPos
= m_xBreak
->getWordBoundary(aTest
, nPos
, aLocale
,
544 i18n::WordType::WORD_COUNT
, true).endPos
;
545 CPPUNIT_ASSERT_EQUAL(aExpected
[i
], nPos
);
548 while (nPos
++ < aTest
.getLength());
549 CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected
), i
);
554 sal_Int32 aExpected
[] = {0, 11, 12, 20, 22, 24, 25, 36, 37,
555 40, 41, 42, 43, 45, 46, 47, 50, 51};
559 CPPUNIT_ASSERT(i
< SAL_N_ELEMENTS(aExpected
));
560 aBounds
= m_xBreak
->getWordBoundary(aTest
, nPos
, aLocale
,
561 i18n::WordType::DICTIONARY_WORD
, true);
562 CPPUNIT_ASSERT_EQUAL(aExpected
[i
], aBounds
.startPos
);
564 CPPUNIT_ASSERT_EQUAL(aExpected
[i
], aBounds
.endPos
);
566 nPos
= aBounds
.endPos
;
568 while (nPos
++ < aTest
.getLength());
569 CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected
), i
);
573 //See https://bz.apache.org/ooo/show_bug.cgi?id=107843
575 aLocale
.Language
= "en";
576 aLocale
.Country
= "US";
578 const sal_Unicode TEST
[] =
580 'r', 'u', 0xFB00, 'l', 'e', ' ', 0xFB01, 's', 'h'
582 OUString
aTest(TEST
, SAL_N_ELEMENTS(TEST
));
584 aBounds
= m_xBreak
->getWordBoundary(aTest
, 1, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
585 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 5);
587 aBounds
= m_xBreak
->getWordBoundary(aTest
, 7, aLocale
, i18n::WordType::DICTIONARY_WORD
, false);
588 CPPUNIT_ASSERT(aBounds
.startPos
== 6 && aBounds
.endPos
== 9);
591 //See https://bz.apache.org/ooo/show_bug.cgi?id=113785
593 aLocale
.Language
= "en";
594 aLocale
.Country
= "US";
596 const sal_Unicode TEST
[] =
598 'a', 0x2013, 'b', 0x2014, 'c'
600 OUString
aTest(TEST
, SAL_N_ELEMENTS(TEST
));
602 aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
, i18n::WordType::DICTIONARY_WORD
, true);
603 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 1);
605 aBounds
= m_xBreak
->nextWord(aTest
, 0, aLocale
, i18n::WordType::DICTIONARY_WORD
);
606 CPPUNIT_ASSERT(aBounds
.startPos
== 2 && aBounds
.endPos
== 3);
608 aBounds
= m_xBreak
->nextWord(aTest
, aBounds
.endPos
, aLocale
, i18n::WordType::DICTIONARY_WORD
);
609 CPPUNIT_ASSERT(aBounds
.startPos
== 4 && aBounds
.endPos
== 5);
613 //See https://bugs.libreoffice.org/show_bug.cgi?id=40292
614 //See https://bz.apache.org/ooo/show_bug.cgi?id=80412
615 //See https://bz.apache.org/ooo/show_bug.cgi?id=111152
616 //See https://bz.apache.org/ooo/show_bug.cgi?id=50172
617 void TestBreakIterator::testGraphemeIteration()
619 lang::Locale aLocale
;
620 aLocale
.Language
= "bn";
621 aLocale
.Country
= "IN";
624 const sal_Unicode BA_HALANT_LA
[] = { 0x09AC, 0x09CD, 0x09AF };
625 OUString
aTest(BA_HALANT_LA
, SAL_N_ELEMENTS(BA_HALANT_LA
));
629 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
630 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
631 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(SAL_N_ELEMENTS(BA_HALANT_LA
)), nPos
);
632 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(BA_HALANT_LA
), aLocale
,
633 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
634 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(0), nPos
);
638 const sal_Unicode HA_HALANT_NA_VOWELSIGNI
[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
639 OUString
aTest(HA_HALANT_NA_VOWELSIGNI
, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI
));
643 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
644 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
645 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI
)), nPos
);
646 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI
), aLocale
,
647 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
648 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(0), nPos
);
652 const sal_Unicode TA_HALANT_MA_HALANT_YA
[] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
653 OUString
aTest(TA_HALANT_MA_HALANT_YA
, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA
));
657 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
658 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
659 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA
)), nPos
);
660 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA
), aLocale
,
661 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
662 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(0), nPos
);
665 aLocale
.Language
= "ta";
666 aLocale
.Country
= "IN";
669 const sal_Unicode KA_VIRAMA_SSA
[] = { 0x0B95, 0x0BCD, 0x0BB7 };
670 OUString
aTest(KA_VIRAMA_SSA
, SAL_N_ELEMENTS(KA_VIRAMA_SSA
));
675 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
676 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
677 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(SAL_N_ELEMENTS(KA_VIRAMA_SSA
)), nPos
);
678 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(KA_VIRAMA_SSA
), aLocale
,
679 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
680 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(0), nPos
);
684 const sal_Unicode KA_VOWELSIGNU
[] = { 0x0B95, 0x0BC1 };
685 OUString
aTest(KA_VOWELSIGNU
, SAL_N_ELEMENTS(KA_VOWELSIGNU
));
690 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
691 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
692 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(SAL_N_ELEMENTS(KA_VOWELSIGNU
)), nPos
);
693 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(KA_VOWELSIGNU
), aLocale
,
694 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
695 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(0), nPos
);
699 const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI
[] =
700 { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
701 OUString
aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI
,
702 SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI
));
707 for (sal_Int32 i
= 0; i
< 4; ++i
)
709 sal_Int32 nOldPos
= nPos
;
710 nPos
= m_xBreak
->nextCharacters(aTest
, nPos
, aLocale
,
711 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
712 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip 2 units", nOldPos
+2, nPos
);
715 for (sal_Int32 i
= 0; i
< 4; ++i
)
717 sal_Int32 nOldPos
= nPos
;
718 nPos
= m_xBreak
->previousCharacters(aTest
, nPos
, aLocale
,
719 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
720 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip 2 units", nOldPos
-2, nPos
);
725 const sal_Unicode ALEF_QAMATS
[] = { 0x05D0, 0x05B8 };
726 OUString
aText(ALEF_QAMATS
, SAL_N_ELEMENTS(ALEF_QAMATS
));
728 sal_Int32 nGraphemeCount
= 0;
730 sal_Int32 nCurPos
= 0;
731 while (nCurPos
< aText
.getLength())
733 sal_Int32 nCount2
= 1;
734 nCurPos
= m_xBreak
->nextCharacters(aText
, nCurPos
, lang::Locale(),
735 i18n::CharacterIteratorMode::SKIPCELL
, nCount2
, nCount2
);
739 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be considered 1 grapheme", static_cast<sal_Int32
>(1), nGraphemeCount
);
742 aLocale
.Language
= "hi";
743 aLocale
.Country
= "IN";
746 const sal_Unicode SHA_VOWELSIGNII
[] = { 0x936, 0x940 };
747 OUString
aTest(SHA_VOWELSIGNII
, SAL_N_ELEMENTS(SHA_VOWELSIGNII
));
752 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
753 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
754 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(SAL_N_ELEMENTS(SHA_VOWELSIGNII
)), nPos
);
755 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(SHA_VOWELSIGNII
), aLocale
,
756 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
757 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32
>(0), nPos
);
761 //A test to ensure that certain ranges and codepoints that are categorized as
762 //weak remain as weak, so that existing docs that depend on this don't silently
763 //change font for those weak chars
764 void TestBreakIterator::testWeak()
766 lang::Locale aLocale
;
767 aLocale
.Language
= "en";
768 aLocale
.Country
= "US";
771 const sal_Unicode WEAKS
[] =
775 0x0300, 0x036F, //Combining Diacritical Marks
776 0x1AB0, 0x1AFF, //Combining Diacritical Marks Extended
777 0x1DC0, 0x1DFF, //Combining Diacritical Marks Supplement
778 0x20D0, 0x20FF, //Combining Diacritical Marks for Symbols
779 0x2150, 0x215F, //Number Forms, fractions
780 0x2160, 0x2180, //Number Forms, roman numerals
781 0x2200, 0x22FF, //Mathematical Operators
782 0x27C0, 0x27EF, //Miscellaneous Mathematical Symbols-A
783 0x2980, 0x29FF, //Miscellaneous Mathematical Symbols-B
784 0x2A00, 0x2AFF, //Supplemental Mathematical Operators
785 0x2100, 0x214F, //Letterlike Symbols
786 0x2308, 0x230B, //Miscellaneous technical
787 0x25A0, 0x25FF, //Geometric Shapes
788 0x2B30, 0x2B4C //Miscellaneous Symbols and Arrows
790 OUString
aWeaks(WEAKS
, SAL_N_ELEMENTS(WEAKS
));
792 for (sal_Int32 i
= 0; i
< aWeaks
.getLength(); ++i
)
794 sal_Int16 nScript
= m_xBreak
->getScriptType(aWeaks
, i
);
797 OString::number(static_cast<sal_Int32
>(aWeaks
[i
]), 16) +
798 " should have been weak";
799 CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg
.getStr(),
800 i18n::ScriptType::WEAK
, nScript
);
805 //A test to ensure that certain ranges and codepoints that are categorized as
806 //asian remain as asian, so that existing docs that depend on this don't silently
807 //change font for those asian chars.
808 //See https://bugs.libreoffice.org/show_bug.cgi?id=38095
809 void TestBreakIterator::testAsian()
811 lang::Locale aLocale
;
812 aLocale
.Language
= "en";
813 aLocale
.Country
= "US";
816 const sal_Unicode ASIANS
[] =
818 //some typical CJK chars
820 //The full HalfWidth and FullWidth block has historically been
821 //designated as taking the CJK font :-(
822 //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
823 //UAX24 as "Common" i.e. by that logic WEAK
825 //HalfWidth and FullWidth forms of ASCII A-z, categorized under
826 //UAX25 as "Latin", i.e. by that logic LATIN
829 OUString
aAsians(ASIANS
, SAL_N_ELEMENTS(ASIANS
));
831 for (sal_Int32 i
= 0; i
< aAsians
.getLength(); ++i
)
833 sal_Int16 nScript
= m_xBreak
->getScriptType(aAsians
, i
);
836 OString::number(static_cast<sal_Int32
>(aAsians
[i
]), 16) +
837 " should have been asian";
838 CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg
.getStr(),
839 i18n::ScriptType::ASIAN
, nScript
);
844 #if (U_ICU_VERSION_MAJOR_NUM > 51)
845 //A test to ensure that our Lao word boundary detection is useful
846 void TestBreakIterator::testLao()
848 lang::Locale aLocale
;
849 aLocale
.Language
= "lo";
850 aLocale
.Country
= "LA";
852 const sal_Unicode LAO
[] = { 0x0e8d, 0x0eb4, 0x0e99, 0x0e94, 0x0eb5, 0x0e95, 0x0ec9, 0x0ead, 0x0e99, 0x0eae, 0x0eb1, 0x0e9a };
853 OUString
aTest(LAO
, SAL_N_ELEMENTS(LAO
));
854 i18n::Boundary aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
,
855 i18n::WordType::DICTIONARY_WORD
, true);
857 CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds
.startPos
);
858 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds
.endPos
);
860 aBounds
= m_xBreak
->getWordBoundary(aTest
, aBounds
.endPos
, aLocale
,
861 i18n::WordType::DICTIONARY_WORD
, true);
863 CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds
.startPos
);
864 CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds
.endPos
);
868 //A test to ensure that our thai word boundary detection is useful
869 void TestBreakIterator::testThai()
871 lang::Locale aLocale
;
872 aLocale
.Language
= "th";
873 aLocale
.Country
= "TH";
875 //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
877 const sal_Unicode THAI
[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
878 OUString
aTest(THAI
, SAL_N_ELEMENTS(THAI
));
879 i18n::Boundary aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
,
880 i18n::WordType::DICTIONARY_WORD
, true);
881 CPPUNIT_ASSERT_MESSAGE("Should skip full word",
882 aBounds
.startPos
== 0 && aBounds
.endPos
== aTest
.getLength());
885 //See https://bz.apache.org/ooo/show_bug.cgi?id=29548
886 //make sure forwards and back are consistent
888 const sal_Unicode THAI
[] =
890 0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
891 0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
892 0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27,
893 0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
894 0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
895 0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27
897 OUString
aTest(THAI
, SAL_N_ELEMENTS(THAI
));
899 std::stack
<sal_Int32
> aPositions
;
903 nPos
= m_xBreak
->nextWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
904 aPositions
.push(nPos
);
906 while (nPos
< aTest
.getLength());
907 nPos
= aTest
.getLength();
908 CPPUNIT_ASSERT(!aPositions
.empty());
912 CPPUNIT_ASSERT(!aPositions
.empty());
913 nPos
= m_xBreak
->previousWord(aTest
, nPos
, aLocale
, i18n::WordType::ANYWORD_IGNOREWHITESPACES
).startPos
;
914 CPPUNIT_ASSERT_EQUAL(aPositions
.top(), nPos
);
922 const sal_Unicode NON_BMP
[] = { 0xD800, 0xDC00 };
923 OUString
aTest(NON_BMP
, SAL_N_ELEMENTS(NON_BMP
));
928 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
929 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
930 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32
>(SAL_N_ELEMENTS(NON_BMP
)), nPos
);
931 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(NON_BMP
), aLocale
,
932 i18n::CharacterIteratorMode::SKIPCELL
, 1, nDone
);
933 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32
>(0), nPos
);
935 nPos
= m_xBreak
->nextCharacters(aTest
, 0, aLocale
,
936 i18n::CharacterIteratorMode::SKIPCHARACTER
, 1, nDone
);
937 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32
>(SAL_N_ELEMENTS(NON_BMP
)), nPos
);
938 nPos
= m_xBreak
->previousCharacters(aTest
, SAL_N_ELEMENTS(NON_BMP
), aLocale
,
939 i18n::CharacterIteratorMode::SKIPCHARACTER
, 1, nDone
);
940 CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32
>(0), nPos
);
945 void TestBreakIterator::testNorthernThai()
947 lang::Locale aLocale
;
948 aLocale
.Language
= "nod";
949 aLocale
.Country
= "TH";
951 const sal_Unicode NORTHERN_THAI1
[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
952 OUString
aTest(NORTHERN_THAI1
, SAL_N_ELEMENTS(NORTHERN_THAI1
));
953 i18n::Boundary aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
,
954 i18n::WordType::DICTIONARY_WORD
, true);
955 CPPUNIT_ASSERT_MESSAGE("Should skip full word",
956 aBounds
.startPos
== 0 && aBounds
.endPos
== aTest
.getLength());
959 // Not sure if any version earlier than 49 did have Khmer word boundary
960 // dictionaries, 4.6 does not.
962 // As of icu 54, word boundary detection for Khmer is still considered
963 // insufficient, so icu khmer stuff is disabled
965 //A test to ensure that our khmer word boundary detection is useful
966 //https://bugs.libreoffice.org/show_bug.cgi?id=52020
967 void TestBreakIterator::testKhmer()
969 lang::Locale aLocale
;
970 aLocale
.Language
= "km";
971 aLocale
.Country
= "KH";
973 const sal_Unicode KHMER
[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
975 OUString
aTest(KHMER
, SAL_N_ELEMENTS(KHMER
));
976 i18n::Boundary aBounds
= m_xBreak
->getWordBoundary(aTest
, 0, aLocale
,
977 i18n::WordType::DICTIONARY_WORD
, true);
979 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 3);
981 aBounds
= m_xBreak
->getWordBoundary(aTest
, aBounds
.endPos
, aLocale
,
982 i18n::WordType::DICTIONARY_WORD
, true);
984 CPPUNIT_ASSERT(aBounds
.startPos
== 3 && aBounds
.endPos
== 5);
988 void TestBreakIterator::doTestJapanese(uno::Reference
< i18n::XBreakIterator
> const &xBreak
)
990 lang::Locale aLocale
;
991 aLocale
.Language
= "ja";
992 aLocale
.Country
= "JP";
993 i18n::Boundary aBounds
;
996 const sal_Unicode JAPANESE
[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 };
998 OUString
aTest(JAPANESE
, SAL_N_ELEMENTS(JAPANESE
));
999 aBounds
= xBreak
->getWordBoundary(aTest
, 5, aLocale
,
1000 i18n::WordType::DICTIONARY_WORD
, true);
1002 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 7);
1006 const sal_Unicode JAPANESE
[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 };
1008 OUString
aTest(JAPANESE
, SAL_N_ELEMENTS(JAPANESE
));
1009 aBounds
= xBreak
->getWordBoundary(aTest
, 1, aLocale
,
1010 i18n::WordType::DICTIONARY_WORD
, true);
1012 CPPUNIT_ASSERT(aBounds
.startPos
== 0 && aBounds
.endPos
== 3);
1014 aBounds
= xBreak
->getWordBoundary(aTest
, 5, aLocale
,
1015 i18n::WordType::DICTIONARY_WORD
, true);
1017 CPPUNIT_ASSERT(aBounds
.startPos
== 3 && aBounds
.endPos
== 6);
1021 void TestBreakIterator::testJapanese()
1023 doTestJapanese(m_xBreak
);
1025 // fdo#78479 - test second / cached instantiation of xdictionary
1026 uno::Reference
< i18n::XBreakIterator
> xTmpBreak(m_xSFactory
->createInstance(
1027 "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW
);
1029 doTestJapanese(xTmpBreak
);
1032 void TestBreakIterator::testChinese()
1034 lang::Locale aLocale
;
1035 aLocale
.Language
= "zh";
1036 aLocale
.Country
= "CN";
1037 i18n::Boundary aBounds
;
1040 const sal_Unicode CHINESE
[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB };
1042 OUString
aTest(CHINESE
, SAL_N_ELEMENTS(CHINESE
));
1043 aBounds
= m_xBreak
->getWordBoundary(aTest
, 4, aLocale
,
1044 i18n::WordType::DICTIONARY_WORD
, true);
1045 CPPUNIT_ASSERT(aBounds
.startPos
== 4 && aBounds
.endPos
== 6);
1048 void TestBreakIterator::setUp()
1050 BootstrapFixtureBase::setUp();
1051 m_xBreak
.set(m_xSFactory
->createInstance("com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW
);
1054 void TestBreakIterator::tearDown()
1057 BootstrapFixtureBase::tearDown();
1060 CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator
);
1062 CPPUNIT_PLUGIN_IMPLEMENT();
1064 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */