nss: upgrade to release 3.73
[LibreOffice.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blob11149be7558f3923fcd1149e586468abb37bf46e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include <config_locales.h>
21 #include <breakiteratorImpl.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <unicode/uchar.h>
24 #include <i18nutil/unicode.hxx>
26 #include <com/sun/star/i18n/CharType.hpp>
27 #include <com/sun/star/i18n/ScriptType.hpp>
28 #include <com/sun/star/i18n/WordType.hpp>
29 #include <com/sun/star/uno/XComponentContext.hpp>
31 using namespace ::com::sun::star;
32 using namespace ::com::sun::star::uno;
33 using namespace ::com::sun::star::i18n;
34 using namespace ::com::sun::star::lang;
36 namespace i18npool {
38 BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
42 BreakIteratorImpl::BreakIteratorImpl()
46 BreakIteratorImpl::~BreakIteratorImpl()
50 #define LBI getLocaleSpecificBreakIterator(rLocale)
52 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
53 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
55 if (nCount < 0)
56 throw RuntimeException("BreakIteratorImpl::nextCharacters: expected nCount >=0, got "
57 + OUString::number(nCount));
59 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
62 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
63 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
65 if (nCount < 0)
66 throw RuntimeException("BreakIteratorImpl::previousCharacters: expected nCount >=0, got "
67 + OUString::number(nCount));
69 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
72 #define isZWSP(c) (ch == 0x200B)
74 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, bool bDirection)
76 sal_uInt32 ch=0;
77 sal_Int32 pos=nPos;
78 switch (rWordType) {
79 case WordType::ANYWORD_IGNOREWHITESPACES:
80 if (bDirection)
81 while (nPos < len)
83 ch = Text.iterateCodePoints(&pos);
84 if (!u_isWhitespace(ch) && !isZWSP(ch))
85 break;
86 nPos = pos;
88 else
89 while (nPos > 0)
91 ch = Text.iterateCodePoints(&pos, -1);
92 if (!u_isWhitespace(ch) && !isZWSP(ch))
93 break;
94 nPos = pos;
96 break;
97 case WordType::DICTIONARY_WORD:
98 if (bDirection)
99 while (nPos < len)
101 ch = Text.iterateCodePoints(&pos);
102 if (!u_isWhitespace(ch) && !isZWSP(ch) && (ch == 0x002E || u_isalnum(ch)))
103 break;
104 nPos = pos;
106 else
107 while (nPos > 0)
109 ch = Text.iterateCodePoints(&pos, -1);
110 if (!u_isWhitespace(ch) && !isZWSP(ch) && (ch == 0x002E || u_isalnum(ch)))
111 break;
112 nPos = pos;
114 break;
115 case WordType::WORD_COUNT:
116 if (bDirection)
117 while (nPos < len)
119 ch = Text.iterateCodePoints(&pos);
120 if (!u_isUWhiteSpace(ch) && !isZWSP(ch))
121 break;
122 nPos = pos;
124 else
125 while (nPos > 0)
127 ch = Text.iterateCodePoints(&pos, -1);
128 if (!u_isUWhiteSpace(ch) && !isZWSP(ch))
129 break;
130 nPos = pos;
132 break;
134 return nPos;
137 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
138 const Locale& rLocale, sal_Int16 rWordType )
140 sal_Int32 len = Text.getLength();
141 if( nStartPos < 0 || len == 0 )
142 result.endPos = result.startPos = 0;
143 else if (nStartPos >= len)
144 result.endPos = result.startPos = len;
145 else {
146 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
148 nStartPos = skipSpace(Text, result.startPos, len, rWordType, true);
150 if ( nStartPos != result.startPos) {
151 if( nStartPos >= len )
152 result.startPos = result.endPos = len;
153 else {
154 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, true);
155 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
156 if (result.startPos < nStartPos) result.startPos = nStartPos;
160 return result;
163 static bool isCJK( const Locale& rLocale ) {
164 return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
167 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
168 const Locale& rLocale, sal_Int16 rWordType)
170 sal_Int32 len = Text.getLength();
171 if( nStartPos <= 0 || len == 0 ) {
172 result.endPos = result.startPos = 0;
173 return result;
174 } else if (nStartPos > len) {
175 result.endPos = result.startPos = len;
176 return result;
179 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, false);
181 // if some spaces are skipped, and the script type is Asian with no CJK rLocale, we have to return
182 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
183 result.startPos = nPos;
184 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
185 result.endPos = -1;
186 return result;
189 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
193 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
194 sal_Int16 rWordType, sal_Bool bDirection )
196 sal_Int32 len = Text.getLength();
197 if( nPos < 0 || len == 0 )
198 result.endPos = result.startPos = 0;
199 else if (nPos > len)
200 result.endPos = result.startPos = len;
201 else {
202 sal_Int32 next, prev;
203 next = skipSpace(Text, nPos, len, rWordType, true);
204 prev = skipSpace(Text, nPos, len, rWordType, false);
205 if (prev == 0 && next == len) {
206 result.endPos = result.startPos = nPos;
207 } else if (prev == 0 && ! bDirection) {
208 result.endPos = result.startPos = 0;
209 } else if (next == len && bDirection) {
210 result.endPos = result.startPos = len;
211 } else {
212 if (next != prev) {
213 if (next == nPos && next != len)
214 bDirection = true;
215 else if (prev == nPos && prev != 0)
216 bDirection = false;
217 else
218 nPos = bDirection ? next : prev;
220 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
223 return result;
226 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
227 const Locale& rLocale, sal_Int16 rWordType )
229 sal_Int32 len = Text.getLength();
231 if (nPos < 0 || nPos >= len) return false;
233 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, true);
235 if (tmp != nPos) return false;
237 result = getWordBoundary(Text, nPos, rLocale, rWordType, true);
239 return result.startPos == nPos;
242 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
243 const Locale& rLocale, sal_Int16 rWordType )
245 sal_Int32 len = Text.getLength();
247 if (nPos <= 0 || nPos > len) return false;
249 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, false);
251 if (tmp != nPos) return false;
253 result = getWordBoundary(Text, nPos, rLocale, rWordType, false);
255 return result.endPos == nPos;
258 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
259 const Locale &rLocale )
261 if (nStartPos < 0 || nStartPos > Text.getLength())
262 return -1;
263 if (Text.isEmpty()) return 0;
264 return LBI->beginOfSentence(Text, nStartPos, rLocale);
267 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
268 const Locale &rLocale )
270 if (nStartPos < 0 || nStartPos > Text.getLength())
271 return -1;
272 if (Text.isEmpty()) return 0;
273 return LBI->endOfSentence(Text, nStartPos, rLocale);
276 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
277 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
278 const LineBreakUserOptions& bOptions )
280 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
283 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
285 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
286 getScriptClass(Text.iterateCodePoints(&nPos, 0));
290 /** Increments/decrements position first, then obtains character.
291 @return current position, may be -1 or text length if string was consumed.
293 static sal_Int32 iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
294 sal_Int32 nLen = Text.getLength();
295 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
296 ch = 0;
297 nStartPos = nStartPos + inc < 0 ? -1 : nLen;
298 } else {
299 ch = Text.iterateCodePoints(&nStartPos, inc);
300 // Fix for #i80436#.
301 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
302 // suspicious as if it cures a symptom... anyway, had to add
303 // nStartPos < Text.getLength() to silence the (correct) assertion
304 // in rtl_uString_iterateCodePoints() if Text was one character
305 // (codepoint) only, made up of a surrogate pair.
306 //if (inc > 0 && nStartPos < Text.getLength())
307 // ch = Text.iterateCodePoints(&nStartPos, 0);
308 // With surrogates, nStartPos may actually point behind string
309 // now, even if inc is only +1
310 if (inc > 0)
311 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
313 return nStartPos;
317 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
318 sal_Int32 nStartPos, sal_Int16 ScriptType )
320 if (nStartPos < 0 || nStartPos >= Text.getLength())
321 return -1;
323 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
324 return -1;
326 if (nStartPos == 0) return 0;
327 sal_uInt32 ch=0;
328 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
329 if (nStartPos == 0) return 0;
332 return iterateCodePoints(Text, nStartPos, 1, ch);
335 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
336 sal_Int32 nStartPos, sal_Int16 ScriptType )
338 if (nStartPos < 0 || nStartPos >= Text.getLength())
339 return -1;
341 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
342 return -1;
344 sal_Int32 strLen = Text.getLength();
345 sal_uInt32 ch=0;
346 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
347 sal_Int16 currentCharScriptType = getScriptClass(ch);
348 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
349 break;
351 return nStartPos;
354 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
355 sal_Int32 nStartPos, sal_Int16 ScriptType )
357 if (nStartPos < 0)
358 return -1;
359 if (nStartPos > Text.getLength())
360 nStartPos = Text.getLength();
362 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
364 sal_uInt32 ch=0;
365 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
366 if (((numberOfChange % 2) == 0) != (ScriptType != getScriptClass(ch)))
367 numberOfChange--;
368 else if (nStartPos == 0) {
369 return -1;
372 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
375 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
376 sal_Int16 ScriptType )
379 if (nStartPos < 0)
380 nStartPos = 0;
381 sal_Int32 strLen = Text.getLength();
382 if (nStartPos >= strLen)
383 return -1;
385 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
387 sal_uInt32 ch=0;
388 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
389 sal_Int16 currentCharScriptType = getScriptClass(ch);
390 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
391 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
392 numberOfChange--;
394 return numberOfChange == 0 ? nStartPos : -1;
397 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
398 const Locale& /*rLocale*/, sal_Int16 CharType )
400 if (CharType == CharType::ANY_CHAR) return 0;
401 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
402 if (CharType != static_cast<sal_Int16>(u_charType( Text.iterateCodePoints(&nStartPos, 0)))) return -1;
404 sal_Int32 nPos=nStartPos;
405 while(nStartPos > 0 && CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, -1)))) { nStartPos=nPos; }
406 return nStartPos; // begin of char block is inclusive
409 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
410 const Locale& /*rLocale*/, sal_Int16 CharType )
412 sal_Int32 strLen = Text.getLength();
414 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
415 if (nStartPos < 0 || nStartPos >= strLen) return -1;
416 if (CharType != static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) return -1;
418 sal_uInt32 ch=0;
419 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == static_cast<sal_Int16>(u_charType(ch))) {}
420 return nStartPos; // end of char block is exclusive
423 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
424 const Locale& /*rLocale*/, sal_Int16 CharType )
426 if (CharType == CharType::ANY_CHAR) return -1;
427 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
429 sal_Int16 numberOfChange = (CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) ? 2 : 1;
430 sal_Int32 strLen = Text.getLength();
432 sal_uInt32 ch=0;
433 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
434 if ((CharType != static_cast<sal_Int16>(u_charType(ch))) != (numberOfChange == 1))
435 numberOfChange--;
437 return numberOfChange == 0 ? nStartPos : -1;
440 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
441 const Locale& /*rLocale*/, sal_Int16 CharType )
443 if(CharType == CharType::ANY_CHAR) return -1;
444 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
446 sal_Int16 numberOfChange = (CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) ? 3 : 2;
448 sal_uInt32 ch=0;
449 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
450 if (((numberOfChange % 2) == 0) != (CharType != static_cast<sal_Int16>(u_charType(ch))))
451 numberOfChange--;
452 if (nStartPos == 0 && numberOfChange > 0) {
453 numberOfChange--;
454 if (numberOfChange == 0) return nStartPos;
457 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
461 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
462 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ )
464 return 0;
467 namespace
469 sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
471 int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
472 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
475 struct UBlock2Script
477 UBlockCode from;
478 UBlockCode to;
479 sal_Int16 script;
482 const UBlock2Script scriptList[] =
484 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
485 {UBLOCK_BASIC_LATIN, UBLOCK_SPACING_MODIFIER_LETTERS, ScriptType::LATIN},
486 {UBLOCK_GREEK, UBLOCK_ARMENIAN, ScriptType::LATIN},
487 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
488 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
489 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
490 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
491 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
492 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
493 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
494 {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
495 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
496 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
497 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
498 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
499 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
500 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
501 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
502 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
503 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
506 #define scriptListCount SAL_N_ELEMENTS(scriptList)
508 //always sets rScriptType
510 //returns true for characters historically explicitly assigned to
511 //latin/weak/asian
513 //returns false for characters that historically implicitly assigned to
514 //weak as unknown
515 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
517 bool bKnown = true;
518 //handle specific characters always as weak:
519 // 0x01 - this breaks a word
520 // 0x02 - this can be inside a word
521 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
522 if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
523 rScriptType = ScriptType::WEAK;
524 // Few Spacing Modifier Letters that can be Bopomofo tonal marks.
525 else if ( 0x2CA == currentChar || 0x2CB == currentChar || 0x2C7 == currentChar || 0x2D9 == currentChar )
526 rScriptType = ScriptType::WEAK;
527 // workaround for Coptic
528 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
529 rScriptType = ScriptType::LATIN;
530 else
532 UBlockCode block=ublock_getCode(currentChar);
533 size_t i = 0;
534 while (i < scriptListCount)
536 if (block <= scriptList[i].to)
537 break;
538 ++i;
540 if (i < scriptListCount && block >= scriptList[i].from)
541 rScriptType = scriptList[i].script;
542 else
544 rScriptType = ScriptType::WEAK;
545 bKnown = false;
548 return bKnown;
552 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
554 static sal_uInt32 lastChar = 0;
555 static sal_Int16 nRet = ScriptType::WEAK;
557 if (currentChar != lastChar)
559 lastChar = currentChar;
561 if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
562 nRet = getScriptClassByUAX24Script(currentChar);
565 return nRet;
568 bool BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName)
570 // to share service between same Language but different Country code, like zh_CN and zh_TW
571 for (const lookupTableItem& listItem : lookupTable) {
572 if (aLocaleName == listItem.aLocale.Language) {
573 xBI = listItem.xBI;
574 return true;
578 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ja
579 if (aLocaleName == "ja")
580 return false;
581 #endif
582 #if !WITH_LOCALE_ALL && !WITH_LOCALE_zh
583 if (aLocaleName == "zh" || aLocaleName == "zh_TW")
584 return false;
585 #endif
586 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ko
587 if (aLocaleName == "ko")
588 return false;
589 #endif
590 #if !WITH_LOCALE_ALL && !WITH_LOCALE_th
591 if (aLocaleName == "th")
592 return false;
593 #endif
595 Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
596 "com.sun.star.i18n.BreakIterator_" + aLocaleName, m_xContext);
598 if ( xI.is() ) {
599 xBI.set(xI, UNO_QUERY);
600 if (xBI.is()) {
601 lookupTable.emplace_back(Locale(aLocaleName, aLocaleName, aLocaleName), xBI);
602 return true;
605 return false;
608 Reference < XBreakIterator >
609 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale)
611 if (xBI.is() && rLocale == aLocale)
612 return xBI;
613 else if (m_xContext.is()) {
614 aLocale = rLocale;
616 for (const lookupTableItem& listItem : lookupTable) {
617 if (rLocale == listItem.aLocale)
619 xBI = listItem.xBI;
620 return xBI;
624 OUStringLiteral under(u"_");
626 sal_Int32 l = rLocale.Language.getLength();
627 sal_Int32 c = rLocale.Country.getLength();
628 sal_Int32 v = rLocale.Variant.getLength();
630 if ((l > 0 && c > 0 && v > 0 &&
631 // load service with name <base>_<lang>_<country>_<variant>
632 createLocaleSpecificBreakIterator(rLocale.Language + under +
633 rLocale.Country + under + rLocale.Variant)) ||
634 (l > 0 && c > 0 &&
635 // load service with name <base>_<lang>_<country>
636 createLocaleSpecificBreakIterator(rLocale.Language + under +
637 rLocale.Country)) ||
638 (l > 0 && c > 0 && rLocale.Language == "zh" &&
639 (rLocale.Country == "HK" ||
640 rLocale.Country == "MO" ) &&
641 // if the country code is HK or MO, one more step to try TW.
642 createLocaleSpecificBreakIterator(rLocale.Language + under +
643 "TW")) ||
644 (l > 0 &&
645 // load service with name <base>_<lang>
646 createLocaleSpecificBreakIterator(rLocale.Language)) ||
647 // load default service with name <base>_Unicode
648 createLocaleSpecificBreakIterator("Unicode")) {
649 lookupTable.emplace_back( aLocale, xBI );
650 return xBI;
653 throw RuntimeException("getLocaleSpecificBreakIterator: iterator not found");
656 OUString SAL_CALL
657 BreakIteratorImpl::getImplementationName()
659 return "com.sun.star.i18n.BreakIterator";
662 sal_Bool SAL_CALL
663 BreakIteratorImpl::supportsService(const OUString& rServiceName)
665 return cppu::supportsService(this, rServiceName);
668 Sequence< OUString > SAL_CALL
669 BreakIteratorImpl::getSupportedServiceNames()
671 return { "com.sun.star.i18n.BreakIterator" };
676 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
677 com_sun_star_i18n_BreakIterator_get_implementation(
678 css::uno::XComponentContext *context,
679 css::uno::Sequence<css::uno::Any> const &)
681 return cppu::acquire(new i18npool::BreakIteratorImpl(context));
684 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */