tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blob6b6870f83ecda7756ee71789dd88a7ad3238349c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <config_locales.h>
22 #include <breakiteratorImpl.hxx>
23 #include <cppuhelper/supportsservice.hxx>
24 #include <unicode/uchar.h>
25 #include <i18nutil/unicode.hxx>
26 #include <o3tl/string_view.hxx>
28 #include <com/sun/star/i18n/CharType.hpp>
29 #include <com/sun/star/i18n/ScriptType.hpp>
30 #include <com/sun/star/i18n/WordType.hpp>
31 #include <com/sun/star/uno/XComponentContext.hpp>
33 using namespace ::com::sun::star;
34 using namespace ::com::sun::star::uno;
35 using namespace ::com::sun::star::i18n;
36 using namespace ::com::sun::star::lang;
38 namespace i18npool {
40 BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
44 BreakIteratorImpl::BreakIteratorImpl()
48 BreakIteratorImpl::~BreakIteratorImpl()
52 #define LBI getLocaleSpecificBreakIterator(rLocale)
54 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
55 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
57 if (nCount < 0)
58 throw RuntimeException("BreakIteratorImpl::nextCharacters: expected nCount >=0, got "
59 + OUString::number(nCount));
61 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
64 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
65 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
67 if (nCount < 0)
68 throw RuntimeException("BreakIteratorImpl::previousCharacters: expected nCount >=0, got "
69 + OUString::number(nCount));
71 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
74 #define isZWSP(c) (ch == 0x200B)
76 static sal_Int32 skipSpace(std::u16string_view Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, bool bDirection)
78 sal_uInt32 ch=0;
79 sal_Int32 pos=nPos;
80 switch (rWordType) {
81 case WordType::ANYWORD_IGNOREWHITESPACES:
82 case WordType::WORD_COUNT:
83 if (bDirection)
84 while (nPos < len)
86 ch = o3tl::iterateCodePoints(Text, &pos);
87 if (!u_isUWhiteSpace(ch) && !isZWSP(ch))
88 break;
89 nPos = pos;
91 else
92 while (nPos > 0)
94 ch = o3tl::iterateCodePoints(Text, &pos, -1);
95 if (!u_isUWhiteSpace(ch) && !isZWSP(ch))
96 break;
97 nPos = pos;
99 break;
100 case WordType::DICTIONARY_WORD:
101 if (bDirection)
102 while (nPos < len)
104 ch = o3tl::iterateCodePoints(Text, &pos);
105 if (!u_isWhitespace(ch) && !isZWSP(ch) && (ch == 0x002E || u_isalnum(ch)))
106 break;
107 nPos = pos;
109 else
110 while (nPos > 0)
112 ch = o3tl::iterateCodePoints(Text, &pos, -1);
113 if (!u_isWhitespace(ch) && !isZWSP(ch) && (ch == 0x002E || u_isalnum(ch)))
114 break;
115 nPos = pos;
117 break;
119 return nPos;
122 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
123 const Locale& rLocale, sal_Int16 rWordType )
125 sal_Int32 len = Text.getLength();
126 if( nStartPos < 0 || len == 0 )
127 result.endPos = result.startPos = 0;
128 else if (nStartPos >= len)
129 result.endPos = result.startPos = len;
130 else {
131 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
133 nStartPos = skipSpace(Text, result.startPos, len, rWordType, true);
135 if ( nStartPos != result.startPos) {
136 if( nStartPos >= len )
137 result.startPos = result.endPos = len;
138 else {
139 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, true);
140 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
141 if (result.startPos < nStartPos) result.startPos = nStartPos;
145 return result;
148 static bool isCJK( const Locale& rLocale ) {
149 return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
152 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
153 const Locale& rLocale, sal_Int16 rWordType)
155 sal_Int32 len = Text.getLength();
156 if( nStartPos <= 0 || len == 0 ) {
157 result.endPos = result.startPos = 0;
158 return result;
159 } else if (nStartPos > len) {
160 result.endPos = result.startPos = len;
161 return result;
164 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, false);
166 // if some spaces are skipped, and the script type is Asian with no CJK rLocale, we have to return
167 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
168 result.startPos = nPos;
169 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
170 result.endPos = -1;
171 return result;
174 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
178 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
179 sal_Int16 rWordType, sal_Bool bDirection )
181 sal_Int32 len = Text.getLength();
182 if( nPos < 0 || len == 0 )
183 result.endPos = result.startPos = 0;
184 else if (nPos > len)
185 result.endPos = result.startPos = len;
186 else {
187 sal_Int32 next, prev;
188 next = skipSpace(Text, nPos, len, rWordType, true);
189 prev = skipSpace(Text, nPos, len, rWordType, false);
190 if (prev == 0 && next == len) {
191 result.endPos = result.startPos = nPos;
192 } else if (prev == 0 && ! bDirection) {
193 result.endPos = result.startPos = 0;
194 } else if (next == len && bDirection) {
195 result.endPos = result.startPos = len;
196 } else {
197 if (next != prev) {
198 if (next == nPos && next != len)
199 bDirection = true;
200 else if (prev == nPos && prev != 0)
201 bDirection = false;
202 else
203 nPos = bDirection ? next : prev;
205 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
208 return result;
211 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
212 const Locale& rLocale, sal_Int16 rWordType )
214 sal_Int32 len = Text.getLength();
216 if (nPos < 0 || nPos >= len) return false;
218 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, true);
220 if (tmp != nPos) return false;
222 result = getWordBoundary(Text, nPos, rLocale, rWordType, true);
224 return result.startPos == nPos;
227 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
228 const Locale& rLocale, sal_Int16 rWordType )
230 sal_Int32 len = Text.getLength();
232 if (nPos <= 0 || nPos > len) return false;
234 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, false);
236 if (tmp != nPos) return false;
238 result = getWordBoundary(Text, nPos, rLocale, rWordType, false);
240 return result.endPos == nPos;
243 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
244 const Locale &rLocale )
246 if (nStartPos < 0 || nStartPos > Text.getLength())
247 return -1;
248 if (Text.isEmpty()) return 0;
249 return LBI->beginOfSentence(Text, nStartPos, rLocale);
252 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
253 const Locale &rLocale )
255 if (nStartPos < 0 || nStartPos > Text.getLength())
256 return -1;
257 if (Text.isEmpty()) return 0;
258 return LBI->endOfSentence(Text, nStartPos, rLocale);
261 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
262 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
263 const LineBreakUserOptions& bOptions )
265 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
268 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
270 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
271 getScriptClass(Text.iterateCodePoints(&nPos, 0));
275 /** Increments/decrements position first, then obtains character.
276 @return current position, may be -1 or text length if string was consumed.
278 static sal_Int32 iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
279 sal_Int32 nLen = Text.getLength();
280 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
281 ch = 0;
282 nStartPos = nStartPos + inc < 0 ? -1 : nLen;
283 } else {
284 ch = Text.iterateCodePoints(&nStartPos, inc);
285 // Fix for #i80436#.
286 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
287 // suspicious as if it cures a symptom... anyway, had to add
288 // nStartPos < Text.getLength() to silence the (correct) assertion
289 // in rtl_uString_iterateCodePoints() if Text was one character
290 // (codepoint) only, made up of a surrogate pair.
291 //if (inc > 0 && nStartPos < Text.getLength())
292 // ch = Text.iterateCodePoints(&nStartPos, 0);
293 // With surrogates, nStartPos may actually point behind string
294 // now, even if inc is only +1
295 if (inc > 0)
296 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
298 return nStartPos;
302 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
303 sal_Int32 nStartPos, sal_Int16 ScriptType )
305 if (nStartPos < 0 || nStartPos >= Text.getLength())
306 return -1;
308 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
309 return -1;
311 if (nStartPos == 0) return 0;
312 sal_uInt32 ch=0;
313 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
314 if (nStartPos == 0) return 0;
317 return iterateCodePoints(Text, nStartPos, 1, ch);
320 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
321 sal_Int32 nStartPos, sal_Int16 ScriptType )
323 if (nStartPos < 0 || nStartPos >= Text.getLength())
324 return -1;
326 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
327 return -1;
329 sal_Int32 strLen = Text.getLength();
330 sal_uInt32 ch=0;
331 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
332 sal_Int16 currentCharScriptType = getScriptClass(ch);
333 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
334 break;
336 return nStartPos;
339 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
340 sal_Int32 nStartPos, sal_Int16 ScriptType )
342 if (nStartPos < 0)
343 return -1;
344 if (nStartPos > Text.getLength())
345 nStartPos = Text.getLength();
347 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
349 sal_uInt32 ch=0;
350 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
351 if (((numberOfChange % 2) == 0) != (ScriptType != getScriptClass(ch)))
352 numberOfChange--;
353 else if (nStartPos == 0) {
354 return -1;
357 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
360 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
361 sal_Int16 ScriptType )
364 if (nStartPos < 0)
365 nStartPos = 0;
366 sal_Int32 strLen = Text.getLength();
367 if (nStartPos >= strLen)
368 return -1;
370 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
372 sal_uInt32 ch=0;
373 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
374 sal_Int16 currentCharScriptType = getScriptClass(ch);
375 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
376 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
377 numberOfChange--;
379 return numberOfChange == 0 ? nStartPos : -1;
382 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
383 const Locale& /*rLocale*/, sal_Int16 CharType )
385 if (CharType == CharType::ANY_CHAR) return 0;
386 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
387 if (CharType != static_cast<sal_Int16>(u_charType( Text.iterateCodePoints(&nStartPos, 0)))) return -1;
389 sal_Int32 nPos=nStartPos;
390 while(nStartPos > 0 && CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, -1)))) { nStartPos=nPos; }
391 return nStartPos; // begin of char block is inclusive
394 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
395 const Locale& /*rLocale*/, sal_Int16 CharType )
397 sal_Int32 strLen = Text.getLength();
399 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
400 if (nStartPos < 0 || nStartPos >= strLen) return -1;
401 if (CharType != static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) return -1;
403 sal_uInt32 ch=0;
404 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == static_cast<sal_Int16>(u_charType(ch))) {}
405 return nStartPos; // end of char block is exclusive
408 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
409 const Locale& /*rLocale*/, sal_Int16 CharType )
411 if (CharType == CharType::ANY_CHAR) return -1;
412 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
414 sal_Int16 numberOfChange = (CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) ? 2 : 1;
415 sal_Int32 strLen = Text.getLength();
417 sal_uInt32 ch=0;
418 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
419 if ((CharType != static_cast<sal_Int16>(u_charType(ch))) != (numberOfChange == 1))
420 numberOfChange--;
422 return numberOfChange == 0 ? nStartPos : -1;
425 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
426 const Locale& /*rLocale*/, sal_Int16 CharType )
428 if(CharType == CharType::ANY_CHAR) return -1;
429 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
431 sal_Int16 numberOfChange = (CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) ? 3 : 2;
433 sal_uInt32 ch=0;
434 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
435 if (((numberOfChange % 2) == 0) != (CharType != static_cast<sal_Int16>(u_charType(ch))))
436 numberOfChange--;
437 if (nStartPos == 0 && numberOfChange > 0) {
438 numberOfChange--;
439 if (numberOfChange == 0) return nStartPos;
442 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
446 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
447 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ )
449 return 0;
452 namespace
454 sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
456 int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
457 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
460 struct UBlock2Script
462 UBlockCode from;
463 UBlockCode to;
464 sal_Int16 script;
467 const UBlock2Script scriptList[] =
469 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
470 {UBLOCK_BASIC_LATIN, UBLOCK_SPACING_MODIFIER_LETTERS, ScriptType::LATIN},
471 {UBLOCK_GREEK, UBLOCK_ARMENIAN, ScriptType::LATIN},
472 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
473 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
474 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
475 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
476 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
477 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
478 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
479 {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
480 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
481 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
482 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
483 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
484 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
485 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
486 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
487 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
488 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
491 #define scriptListCount SAL_N_ELEMENTS(scriptList)
493 //always sets rScriptType
495 //returns true for characters historically explicitly assigned to
496 //latin/weak/asian
498 //returns false for characters that historically implicitly assigned to
499 //weak as unknown
500 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
502 bool bKnown = true;
503 //handle specific characters always as weak:
504 // 0x01 - this breaks a word
505 // 0x02 - this can be inside a word
506 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
507 if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
508 rScriptType = ScriptType::WEAK;
509 // Few Spacing Modifier Letters that can be Bopomofo tonal marks.
510 else if ( 0x2CA == currentChar || 0x2CB == currentChar || 0x2C7 == currentChar || 0x2D9 == currentChar )
511 rScriptType = ScriptType::WEAK;
512 // tdf#52577 superscript numbers should be we weak.
513 else if ( 0xB2 == currentChar || 0xB3 == currentChar || 0xB9 == currentChar )
514 rScriptType = ScriptType::WEAK;
515 // workaround for Coptic
516 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
517 rScriptType = ScriptType::LATIN;
518 else
520 UBlockCode block=ublock_getCode(currentChar);
521 size_t i = 0;
522 while (i < scriptListCount)
524 if (block <= scriptList[i].to)
525 break;
526 ++i;
528 if (i < scriptListCount && block >= scriptList[i].from)
529 rScriptType = scriptList[i].script;
530 else
532 rScriptType = ScriptType::WEAK;
533 bKnown = false;
536 return bKnown;
540 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
542 static sal_uInt32 lastChar = 0;
543 static sal_Int16 nRet = ScriptType::WEAK;
545 if (currentChar != lastChar)
547 lastChar = currentChar;
549 if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
550 nRet = getScriptClassByUAX24Script(currentChar);
553 return nRet;
556 bool BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName)
558 // to share service between same Language but different Country code, like zh_CN and zh_TW
559 for (const lookupTableItem& listItem : lookupTable) {
560 if (aLocaleName == listItem.aLocale.Language) {
561 xBI = listItem.xBI;
562 return true;
566 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ja
567 if (aLocaleName == "ja")
568 return false;
569 #endif
570 #if !WITH_LOCALE_ALL && !WITH_LOCALE_zh
571 if (aLocaleName == "zh" || aLocaleName == "zh_TW")
572 return false;
573 #endif
574 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ko
575 if (aLocaleName == "ko")
576 return false;
577 #endif
579 Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
580 "com.sun.star.i18n.BreakIterator_" + aLocaleName, m_xContext);
582 if ( xI.is() ) {
583 xBI.set(xI, UNO_QUERY);
584 if (xBI.is()) {
585 lookupTable.emplace_back(Locale(aLocaleName, aLocaleName, aLocaleName), xBI);
586 return true;
589 return false;
592 const Reference < XBreakIterator > &
593 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale)
595 if (xBI.is() && rLocale == aLocale)
596 return xBI;
597 else if (m_xContext.is()) {
598 aLocale = rLocale;
600 for (const lookupTableItem& listItem : lookupTable) {
601 if (rLocale == listItem.aLocale)
603 xBI = listItem.xBI;
604 return xBI;
608 static constexpr OUString under(u"_"_ustr);
610 sal_Int32 l = rLocale.Language.getLength();
611 sal_Int32 c = rLocale.Country.getLength();
612 sal_Int32 v = rLocale.Variant.getLength();
614 if ((l > 0 && c > 0 && v > 0 &&
615 // load service with name <base>_<lang>_<country>_<variant>
616 createLocaleSpecificBreakIterator(rLocale.Language + under +
617 rLocale.Country + under + rLocale.Variant)) ||
618 (l > 0 && c > 0 &&
619 // load service with name <base>_<lang>_<country>
620 createLocaleSpecificBreakIterator(rLocale.Language + under +
621 rLocale.Country)) ||
622 (l > 0 && c > 0 && rLocale.Language == "zh" &&
623 (rLocale.Country == "HK" ||
624 rLocale.Country == "MO" ) &&
625 // if the country code is HK or MO, one more step to try TW.
626 createLocaleSpecificBreakIterator(rLocale.Language + under +
627 "TW")) ||
628 (l > 0 &&
629 // load service with name <base>_<lang>
630 createLocaleSpecificBreakIterator(rLocale.Language)) ||
631 // load default service with name <base>_Unicode
632 createLocaleSpecificBreakIterator(u"Unicode"_ustr)) {
633 lookupTable.emplace_back( aLocale, xBI );
634 return xBI;
637 throw RuntimeException(u"getLocaleSpecificBreakIterator: iterator not found"_ustr);
640 OUString SAL_CALL
641 BreakIteratorImpl::getImplementationName()
643 return u"com.sun.star.i18n.BreakIterator"_ustr;
646 sal_Bool SAL_CALL
647 BreakIteratorImpl::supportsService(const OUString& rServiceName)
649 return cppu::supportsService(this, rServiceName);
652 Sequence< OUString > SAL_CALL
653 BreakIteratorImpl::getSupportedServiceNames()
655 return { u"com.sun.star.i18n.BreakIterator"_ustr };
660 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
661 com_sun_star_i18n_BreakIterator_get_implementation(
662 css::uno::XComponentContext *context,
663 css::uno::Sequence<css::uno::Any> const &)
665 return cppu::acquire(new i18npool::BreakIteratorImpl(context));
668 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */