1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <config_locales.h>
22 #include <breakiteratorImpl.hxx>
23 #include <cppuhelper/supportsservice.hxx>
24 #include <unicode/uchar.h>
25 #include <i18nutil/unicode.hxx>
26 #include <o3tl/string_view.hxx>
28 #include <com/sun/star/i18n/CharType.hpp>
29 #include <com/sun/star/i18n/ScriptType.hpp>
30 #include <com/sun/star/i18n/WordType.hpp>
31 #include <com/sun/star/uno/XComponentContext.hpp>
33 using namespace ::com::sun::star
;
34 using namespace ::com::sun::star::uno
;
35 using namespace ::com::sun::star::i18n
;
36 using namespace ::com::sun::star::lang
;
40 BreakIteratorImpl::BreakIteratorImpl( const Reference
< XComponentContext
>& rxContext
) : m_xContext( rxContext
)
44 BreakIteratorImpl::BreakIteratorImpl()
48 BreakIteratorImpl::~BreakIteratorImpl()
52 #define LBI getLocaleSpecificBreakIterator(rLocale)
54 sal_Int32 SAL_CALL
BreakIteratorImpl::nextCharacters( const OUString
& Text
, sal_Int32 nStartPos
,
55 const Locale
&rLocale
, sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
58 throw RuntimeException("BreakIteratorImpl::nextCharacters: expected nCount >=0, got "
59 + OUString::number(nCount
));
61 return LBI
->nextCharacters( Text
, nStartPos
, rLocale
, nCharacterIteratorMode
, nCount
, nDone
);
64 sal_Int32 SAL_CALL
BreakIteratorImpl::previousCharacters( const OUString
& Text
, sal_Int32 nStartPos
,
65 const Locale
& rLocale
, sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
68 throw RuntimeException("BreakIteratorImpl::previousCharacters: expected nCount >=0, got "
69 + OUString::number(nCount
));
71 return LBI
->previousCharacters( Text
, nStartPos
, rLocale
, nCharacterIteratorMode
, nCount
, nDone
);
74 #define isZWSP(c) (ch == 0x200B)
76 static sal_Int32
skipSpace(std::u16string_view Text
, sal_Int32 nPos
, sal_Int32 len
, sal_Int16 rWordType
, bool bDirection
)
81 case WordType::ANYWORD_IGNOREWHITESPACES
:
82 case WordType::WORD_COUNT
:
86 ch
= o3tl::iterateCodePoints(Text
, &pos
);
87 if (!u_isUWhiteSpace(ch
) && !isZWSP(ch
))
94 ch
= o3tl::iterateCodePoints(Text
, &pos
, -1);
95 if (!u_isUWhiteSpace(ch
) && !isZWSP(ch
))
100 case WordType::DICTIONARY_WORD
:
104 ch
= o3tl::iterateCodePoints(Text
, &pos
);
105 if (!u_isWhitespace(ch
) && !isZWSP(ch
) && (ch
== 0x002E || u_isalnum(ch
)))
112 ch
= o3tl::iterateCodePoints(Text
, &pos
, -1);
113 if (!u_isWhitespace(ch
) && !isZWSP(ch
) && (ch
== 0x002E || u_isalnum(ch
)))
122 Boundary SAL_CALL
BreakIteratorImpl::nextWord( const OUString
& Text
, sal_Int32 nStartPos
,
123 const Locale
& rLocale
, sal_Int16 rWordType
)
125 sal_Int32 len
= Text
.getLength();
126 if( nStartPos
< 0 || len
== 0 )
127 result
.endPos
= result
.startPos
= 0;
128 else if (nStartPos
>= len
)
129 result
.endPos
= result
.startPos
= len
;
131 result
= LBI
->nextWord(Text
, nStartPos
, rLocale
, rWordType
);
133 nStartPos
= skipSpace(Text
, result
.startPos
, len
, rWordType
, true);
135 if ( nStartPos
!= result
.startPos
) {
136 if( nStartPos
>= len
)
137 result
.startPos
= result
.endPos
= len
;
139 result
= LBI
->getWordBoundary(Text
, nStartPos
, rLocale
, rWordType
, true);
140 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
141 if (result
.startPos
< nStartPos
) result
.startPos
= nStartPos
;
148 static bool isCJK( const Locale
& rLocale
) {
149 return rLocale
.Language
== "zh" || rLocale
.Language
== "ja" || rLocale
.Language
== "ko";
152 Boundary SAL_CALL
BreakIteratorImpl::previousWord( const OUString
& Text
, sal_Int32 nStartPos
,
153 const Locale
& rLocale
, sal_Int16 rWordType
)
155 sal_Int32 len
= Text
.getLength();
156 if( nStartPos
<= 0 || len
== 0 ) {
157 result
.endPos
= result
.startPos
= 0;
159 } else if (nStartPos
> len
) {
160 result
.endPos
= result
.startPos
= len
;
164 sal_Int32 nPos
= skipSpace(Text
, nStartPos
, len
, rWordType
, false);
166 // if some spaces are skipped, and the script type is Asian with no CJK rLocale, we have to return
167 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
168 result
.startPos
= nPos
;
169 if (nPos
!= nStartPos
&& nPos
> 0 && !isCJK(rLocale
) && getScriptClass(Text
.iterateCodePoints(&nPos
, -1)) == ScriptType::ASIAN
) {
174 return LBI
->previousWord(Text
, result
.startPos
, rLocale
, rWordType
);
178 Boundary SAL_CALL
BreakIteratorImpl::getWordBoundary( const OUString
& Text
, sal_Int32 nPos
, const Locale
& rLocale
,
179 sal_Int16 rWordType
, sal_Bool bDirection
)
181 sal_Int32 len
= Text
.getLength();
182 if( nPos
< 0 || len
== 0 )
183 result
.endPos
= result
.startPos
= 0;
185 result
.endPos
= result
.startPos
= len
;
187 sal_Int32 next
, prev
;
188 next
= skipSpace(Text
, nPos
, len
, rWordType
, true);
189 prev
= skipSpace(Text
, nPos
, len
, rWordType
, false);
190 if (prev
== 0 && next
== len
) {
191 result
.endPos
= result
.startPos
= nPos
;
192 } else if (prev
== 0 && ! bDirection
) {
193 result
.endPos
= result
.startPos
= 0;
194 } else if (next
== len
&& bDirection
) {
195 result
.endPos
= result
.startPos
= len
;
198 if (next
== nPos
&& next
!= len
)
200 else if (prev
== nPos
&& prev
!= 0)
203 nPos
= bDirection
? next
: prev
;
205 result
= LBI
->getWordBoundary(Text
, nPos
, rLocale
, rWordType
, bDirection
);
211 sal_Bool SAL_CALL
BreakIteratorImpl::isBeginWord( const OUString
& Text
, sal_Int32 nPos
,
212 const Locale
& rLocale
, sal_Int16 rWordType
)
214 sal_Int32 len
= Text
.getLength();
216 if (nPos
< 0 || nPos
>= len
) return false;
218 sal_Int32 tmp
= skipSpace(Text
, nPos
, len
, rWordType
, true);
220 if (tmp
!= nPos
) return false;
222 result
= getWordBoundary(Text
, nPos
, rLocale
, rWordType
, true);
224 return result
.startPos
== nPos
;
227 sal_Bool SAL_CALL
BreakIteratorImpl::isEndWord( const OUString
& Text
, sal_Int32 nPos
,
228 const Locale
& rLocale
, sal_Int16 rWordType
)
230 sal_Int32 len
= Text
.getLength();
232 if (nPos
<= 0 || nPos
> len
) return false;
234 sal_Int32 tmp
= skipSpace(Text
, nPos
, len
, rWordType
, false);
236 if (tmp
!= nPos
) return false;
238 result
= getWordBoundary(Text
, nPos
, rLocale
, rWordType
, false);
240 return result
.endPos
== nPos
;
243 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfSentence( const OUString
& Text
, sal_Int32 nStartPos
,
244 const Locale
&rLocale
)
246 if (nStartPos
< 0 || nStartPos
> Text
.getLength())
248 if (Text
.isEmpty()) return 0;
249 return LBI
->beginOfSentence(Text
, nStartPos
, rLocale
);
252 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfSentence( const OUString
& Text
, sal_Int32 nStartPos
,
253 const Locale
&rLocale
)
255 if (nStartPos
< 0 || nStartPos
> Text
.getLength())
257 if (Text
.isEmpty()) return 0;
258 return LBI
->endOfSentence(Text
, nStartPos
, rLocale
);
261 LineBreakResults SAL_CALL
BreakIteratorImpl::getLineBreak( const OUString
& Text
, sal_Int32 nStartPos
,
262 const Locale
& rLocale
, sal_Int32 nMinBreakPos
, const LineBreakHyphenationOptions
& hOptions
,
263 const LineBreakUserOptions
& bOptions
)
265 return LBI
->getLineBreak(Text
, nStartPos
, rLocale
, nMinBreakPos
, hOptions
, bOptions
);
268 sal_Int16 SAL_CALL
BreakIteratorImpl::getScriptType( const OUString
& Text
, sal_Int32 nPos
)
270 return (nPos
< 0 || nPos
>= Text
.getLength()) ? ScriptType::WEAK
:
271 getScriptClass(Text
.iterateCodePoints(&nPos
, 0));
275 /** Increments/decrements position first, then obtains character.
276 @return current position, may be -1 or text length if string was consumed.
278 static sal_Int32
iterateCodePoints(const OUString
& Text
, sal_Int32
&nStartPos
, sal_Int32 inc
, sal_uInt32
& ch
) {
279 sal_Int32 nLen
= Text
.getLength();
280 if (nStartPos
+ inc
< 0 || nStartPos
+ inc
>= nLen
) {
282 nStartPos
= nStartPos
+ inc
< 0 ? -1 : nLen
;
284 ch
= Text
.iterateCodePoints(&nStartPos
, inc
);
286 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
287 // suspicious as if it cures a symptom... anyway, had to add
288 // nStartPos < Text.getLength() to silence the (correct) assertion
289 // in rtl_uString_iterateCodePoints() if Text was one character
290 // (codepoint) only, made up of a surrogate pair.
291 //if (inc > 0 && nStartPos < Text.getLength())
292 // ch = Text.iterateCodePoints(&nStartPos, 0);
293 // With surrogates, nStartPos may actually point behind string
294 // now, even if inc is only +1
296 ch
= (nStartPos
< nLen
? Text
.iterateCodePoints(&nStartPos
, 0) : 0);
302 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfScript( const OUString
& Text
,
303 sal_Int32 nStartPos
, sal_Int16 ScriptType
)
305 if (nStartPos
< 0 || nStartPos
>= Text
.getLength())
308 if(ScriptType
!= getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0)))
311 if (nStartPos
== 0) return 0;
313 while (iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0 && ScriptType
== getScriptClass(ch
)) {
314 if (nStartPos
== 0) return 0;
317 return iterateCodePoints(Text
, nStartPos
, 1, ch
);
320 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfScript( const OUString
& Text
,
321 sal_Int32 nStartPos
, sal_Int16 ScriptType
)
323 if (nStartPos
< 0 || nStartPos
>= Text
.getLength())
326 if(ScriptType
!= getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0)))
329 sal_Int32 strLen
= Text
.getLength();
331 while(iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
332 sal_Int16 currentCharScriptType
= getScriptClass(ch
);
333 if(ScriptType
!= currentCharScriptType
&& currentCharScriptType
!= ScriptType::WEAK
)
339 sal_Int32 SAL_CALL
BreakIteratorImpl::previousScript( const OUString
& Text
,
340 sal_Int32 nStartPos
, sal_Int16 ScriptType
)
344 if (nStartPos
> Text
.getLength())
345 nStartPos
= Text
.getLength();
347 sal_Int16 numberOfChange
= (ScriptType
== getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0))) ? 3 : 2;
350 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0) {
351 if (((numberOfChange
% 2) == 0) != (ScriptType
!= getScriptClass(ch
)))
353 else if (nStartPos
== 0) {
357 return numberOfChange
== 0 ? iterateCodePoints(Text
, nStartPos
, 1, ch
) : -1;
360 sal_Int32 SAL_CALL
BreakIteratorImpl::nextScript( const OUString
& Text
, sal_Int32 nStartPos
,
361 sal_Int16 ScriptType
)
366 sal_Int32 strLen
= Text
.getLength();
367 if (nStartPos
>= strLen
)
370 sal_Int16 numberOfChange
= (ScriptType
== getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0))) ? 2 : 1;
373 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
374 sal_Int16 currentCharScriptType
= getScriptClass(ch
);
375 if ((numberOfChange
== 1) ? (ScriptType
== currentCharScriptType
) :
376 (ScriptType
!= currentCharScriptType
&& currentCharScriptType
!= ScriptType::WEAK
))
379 return numberOfChange
== 0 ? nStartPos
: -1;
382 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
383 const Locale
& /*rLocale*/, sal_Int16 CharType
)
385 if (CharType
== CharType::ANY_CHAR
) return 0;
386 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
387 if (CharType
!= static_cast<sal_Int16
>(u_charType( Text
.iterateCodePoints(&nStartPos
, 0)))) return -1;
389 sal_Int32 nPos
=nStartPos
;
390 while(nStartPos
> 0 && CharType
== static_cast<sal_Int16
>(u_charType(Text
.iterateCodePoints(&nPos
, -1)))) { nStartPos
=nPos
; }
391 return nStartPos
; // begin of char block is inclusive
394 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
395 const Locale
& /*rLocale*/, sal_Int16 CharType
)
397 sal_Int32 strLen
= Text
.getLength();
399 if (CharType
== CharType::ANY_CHAR
) return strLen
; // end of char block is exclusive
400 if (nStartPos
< 0 || nStartPos
>= strLen
) return -1;
401 if (CharType
!= static_cast<sal_Int16
>(u_charType(Text
.iterateCodePoints(&nStartPos
, 0)))) return -1;
404 while(iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
&& CharType
== static_cast<sal_Int16
>(u_charType(ch
))) {}
405 return nStartPos
; // end of char block is exclusive
408 sal_Int32 SAL_CALL
BreakIteratorImpl::nextCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
409 const Locale
& /*rLocale*/, sal_Int16 CharType
)
411 if (CharType
== CharType::ANY_CHAR
) return -1;
412 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
414 sal_Int16 numberOfChange
= (CharType
== static_cast<sal_Int16
>(u_charType(Text
.iterateCodePoints(&nStartPos
, 0)))) ? 2 : 1;
415 sal_Int32 strLen
= Text
.getLength();
418 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
419 if ((CharType
!= static_cast<sal_Int16
>(u_charType(ch
))) != (numberOfChange
== 1))
422 return numberOfChange
== 0 ? nStartPos
: -1;
425 sal_Int32 SAL_CALL
BreakIteratorImpl::previousCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
426 const Locale
& /*rLocale*/, sal_Int16 CharType
)
428 if(CharType
== CharType::ANY_CHAR
) return -1;
429 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
431 sal_Int16 numberOfChange
= (CharType
== static_cast<sal_Int16
>(u_charType(Text
.iterateCodePoints(&nStartPos
, 0)))) ? 3 : 2;
434 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0) {
435 if (((numberOfChange
% 2) == 0) != (CharType
!= static_cast<sal_Int16
>(u_charType(ch
))))
437 if (nStartPos
== 0 && numberOfChange
> 0) {
439 if (numberOfChange
== 0) return nStartPos
;
442 return numberOfChange
== 0 ? iterateCodePoints(Text
, nStartPos
, 1, ch
) : -1;
446 sal_Int16 SAL_CALL
BreakIteratorImpl::getWordType( const OUString
& /*Text*/,
447 sal_Int32
/*nPos*/, const Locale
& /*rLocale*/ )
454 sal_Int16
getScriptClassByUAX24Script(sal_uInt32 currentChar
)
456 int32_t script
= u_getIntPropertyValue(currentChar
, UCHAR_SCRIPT
);
457 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode
>(script
));
467 const UBlock2Script scriptList
[] =
469 {UBLOCK_NO_BLOCK
, UBLOCK_NO_BLOCK
, ScriptType::WEAK
},
470 {UBLOCK_BASIC_LATIN
, UBLOCK_SPACING_MODIFIER_LETTERS
, ScriptType::LATIN
},
471 {UBLOCK_GREEK
, UBLOCK_ARMENIAN
, ScriptType::LATIN
},
472 {UBLOCK_HEBREW
, UBLOCK_MYANMAR
, ScriptType::COMPLEX
},
473 {UBLOCK_GEORGIAN
, UBLOCK_GEORGIAN
, ScriptType::LATIN
},
474 {UBLOCK_HANGUL_JAMO
, UBLOCK_HANGUL_JAMO
, ScriptType::ASIAN
},
475 {UBLOCK_ETHIOPIC
, UBLOCK_ETHIOPIC
, ScriptType::COMPLEX
},
476 {UBLOCK_CHEROKEE
, UBLOCK_RUNIC
, ScriptType::LATIN
},
477 {UBLOCK_KHMER
, UBLOCK_MONGOLIAN
, ScriptType::COMPLEX
},
478 {UBLOCK_LATIN_EXTENDED_ADDITIONAL
, UBLOCK_GREEK_EXTENDED
, ScriptType::LATIN
},
479 {UBLOCK_NUMBER_FORMS
, UBLOCK_NUMBER_FORMS
, ScriptType::WEAK
},
480 {UBLOCK_CJK_RADICALS_SUPPLEMENT
, UBLOCK_HANGUL_SYLLABLES
, ScriptType::ASIAN
},
481 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, ScriptType::ASIAN
},
482 {UBLOCK_ARABIC_PRESENTATION_FORMS_A
, UBLOCK_ARABIC_PRESENTATION_FORMS_A
, ScriptType::COMPLEX
},
483 {UBLOCK_CJK_COMPATIBILITY_FORMS
, UBLOCK_CJK_COMPATIBILITY_FORMS
, ScriptType::ASIAN
},
484 {UBLOCK_ARABIC_PRESENTATION_FORMS_B
, UBLOCK_ARABIC_PRESENTATION_FORMS_B
, ScriptType::COMPLEX
},
485 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, ScriptType::ASIAN
},
486 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
, ScriptType::ASIAN
},
487 {UBLOCK_CJK_STROKES
, UBLOCK_CJK_STROKES
, ScriptType::ASIAN
},
488 {UBLOCK_LATIN_EXTENDED_C
, UBLOCK_LATIN_EXTENDED_D
, ScriptType::LATIN
}
491 #define scriptListCount SAL_N_ELEMENTS(scriptList)
493 //always sets rScriptType
495 //returns true for characters historically explicitly assigned to
498 //returns false for characters that historically implicitly assigned to
500 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar
, sal_Int16
&rScriptType
)
503 //handle specific characters always as weak:
504 // 0x01 - this breaks a word
505 // 0x02 - this can be inside a word
506 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
507 if( 0x01 == currentChar
|| 0x02 == currentChar
|| 0x20 == currentChar
|| 0xA0 == currentChar
)
508 rScriptType
= ScriptType::WEAK
;
509 // Few Spacing Modifier Letters that can be Bopomofo tonal marks.
510 else if ( 0x2CA == currentChar
|| 0x2CB == currentChar
|| 0x2C7 == currentChar
|| 0x2D9 == currentChar
)
511 rScriptType
= ScriptType::WEAK
;
512 // tdf#52577 superscript numbers should be we weak.
513 else if ( 0xB2 == currentChar
|| 0xB3 == currentChar
|| 0xB9 == currentChar
)
514 rScriptType
= ScriptType::WEAK
;
515 // workaround for Coptic
516 else if ( 0x2C80 <= currentChar
&& 0x2CE3 >= currentChar
)
517 rScriptType
= ScriptType::LATIN
;
520 UBlockCode block
=ublock_getCode(currentChar
);
522 while (i
< scriptListCount
)
524 if (block
<= scriptList
[i
].to
)
528 if (i
< scriptListCount
&& block
>= scriptList
[i
].from
)
529 rScriptType
= scriptList
[i
].script
;
532 rScriptType
= ScriptType::WEAK
;
540 sal_Int16
BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar
)
542 static sal_uInt32 lastChar
= 0;
543 static sal_Int16 nRet
= ScriptType::WEAK
;
545 if (currentChar
!= lastChar
)
547 lastChar
= currentChar
;
549 if (!getCompatibilityScriptClassByBlock(currentChar
, nRet
))
550 nRet
= getScriptClassByUAX24Script(currentChar
);
556 bool BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString
& aLocaleName
)
558 // to share service between same Language but different Country code, like zh_CN and zh_TW
559 for (const lookupTableItem
& listItem
: lookupTable
) {
560 if (aLocaleName
== listItem
.aLocale
.Language
) {
566 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ja
567 if (aLocaleName
== "ja")
570 #if !WITH_LOCALE_ALL && !WITH_LOCALE_zh
571 if (aLocaleName
== "zh" || aLocaleName
== "zh_TW")
574 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ko
575 if (aLocaleName
== "ko")
579 Reference
< uno::XInterface
> xI
= m_xContext
->getServiceManager()->createInstanceWithContext(
580 "com.sun.star.i18n.BreakIterator_" + aLocaleName
, m_xContext
);
583 xBI
.set(xI
, UNO_QUERY
);
585 lookupTable
.emplace_back(Locale(aLocaleName
, aLocaleName
, aLocaleName
), xBI
);
592 const Reference
< XBreakIterator
> &
593 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale
& rLocale
)
595 if (xBI
.is() && rLocale
== aLocale
)
597 else if (m_xContext
.is()) {
600 for (const lookupTableItem
& listItem
: lookupTable
) {
601 if (rLocale
== listItem
.aLocale
)
608 static constexpr OUString
under(u
"_"_ustr
);
610 sal_Int32 l
= rLocale
.Language
.getLength();
611 sal_Int32 c
= rLocale
.Country
.getLength();
612 sal_Int32 v
= rLocale
.Variant
.getLength();
614 if ((l
> 0 && c
> 0 && v
> 0 &&
615 // load service with name <base>_<lang>_<country>_<variant>
616 createLocaleSpecificBreakIterator(rLocale
.Language
+ under
+
617 rLocale
.Country
+ under
+ rLocale
.Variant
)) ||
619 // load service with name <base>_<lang>_<country>
620 createLocaleSpecificBreakIterator(rLocale
.Language
+ under
+
622 (l
> 0 && c
> 0 && rLocale
.Language
== "zh" &&
623 (rLocale
.Country
== "HK" ||
624 rLocale
.Country
== "MO" ) &&
625 // if the country code is HK or MO, one more step to try TW.
626 createLocaleSpecificBreakIterator(rLocale
.Language
+ under
+
629 // load service with name <base>_<lang>
630 createLocaleSpecificBreakIterator(rLocale
.Language
)) ||
631 // load default service with name <base>_Unicode
632 createLocaleSpecificBreakIterator(u
"Unicode"_ustr
)) {
633 lookupTable
.emplace_back( aLocale
, xBI
);
637 throw RuntimeException(u
"getLocaleSpecificBreakIterator: iterator not found"_ustr
);
641 BreakIteratorImpl::getImplementationName()
643 return u
"com.sun.star.i18n.BreakIterator"_ustr
;
647 BreakIteratorImpl::supportsService(const OUString
& rServiceName
)
649 return cppu::supportsService(this, rServiceName
);
652 Sequence
< OUString
> SAL_CALL
653 BreakIteratorImpl::getSupportedServiceNames()
655 return { u
"com.sun.star.i18n.BreakIterator"_ustr
};
660 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
661 com_sun_star_i18n_BreakIterator_get_implementation(
662 css::uno::XComponentContext
*context
,
663 css::uno::Sequence
<css::uno::Any
> const &)
665 return cppu::acquire(new i18npool::BreakIteratorImpl(context
));
668 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */