bump product version to 6.3.0.0.beta1
[LibreOffice.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blob857aa21ee44967f33061f6660554fa3c8332b38b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include <config_locales.h>
21 #include <breakiteratorImpl.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <unicode/uchar.h>
24 #include <i18nutil/unicode.hxx>
25 #include <rtl/ustrbuf.hxx>
27 #include <com/sun/star/i18n/CharType.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/WordType.hpp>
30 #include <com/sun/star/uno/XComponentContext.hpp>
32 using namespace ::com::sun::star;
33 using namespace ::com::sun::star::uno;
34 using namespace ::com::sun::star::i18n;
35 using namespace ::com::sun::star::lang;
37 namespace i18npool {
39 BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
43 BreakIteratorImpl::BreakIteratorImpl()
47 BreakIteratorImpl::~BreakIteratorImpl()
51 #define LBI getLocaleSpecificBreakIterator(rLocale)
53 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
54 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
56 if (nCount < 0) throw RuntimeException();
58 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
61 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
62 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
64 if (nCount < 0) throw RuntimeException();
66 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
69 #define isZWSP(c) (ch == 0x200B)
71 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, bool bDirection)
73 sal_uInt32 ch=0;
74 sal_Int32 pos=nPos;
75 switch (rWordType) {
76 case WordType::ANYWORD_IGNOREWHITESPACES:
77 if (bDirection)
78 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos)) || isZWSP(ch))) nPos=pos;
79 else
80 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
81 break;
82 case WordType::DICTIONARY_WORD:
83 if (bDirection)
84 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos)) || isZWSP(ch) ||
85 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
86 else
87 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
88 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
89 break;
90 case WordType::WORD_COUNT:
91 if (bDirection)
92 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos)) || isZWSP(ch))) nPos=pos;
93 else
94 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
95 break;
97 return nPos;
100 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
101 const Locale& rLocale, sal_Int16 rWordType )
103 sal_Int32 len = Text.getLength();
104 if( nStartPos < 0 || len == 0 )
105 result.endPos = result.startPos = 0;
106 else if (nStartPos >= len)
107 result.endPos = result.startPos = len;
108 else {
109 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
111 nStartPos = skipSpace(Text, result.startPos, len, rWordType, true);
113 if ( nStartPos != result.startPos) {
114 if( nStartPos >= len )
115 result.startPos = result.endPos = len;
116 else {
117 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, true);
118 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
119 if (result.startPos < nStartPos) result.startPos = nStartPos;
123 return result;
126 static bool isCJK( const Locale& rLocale ) {
127 return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
130 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
131 const Locale& rLocale, sal_Int16 rWordType)
133 sal_Int32 len = Text.getLength();
134 if( nStartPos <= 0 || len == 0 ) {
135 result.endPos = result.startPos = 0;
136 return result;
137 } else if (nStartPos > len) {
138 result.endPos = result.startPos = len;
139 return result;
142 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, false);
144 // if some spaces are skipped, and the script type is Asian with no CJK rLocale, we have to return
145 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
146 result.startPos = nPos;
147 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
148 result.endPos = -1;
149 return result;
152 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
156 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
157 sal_Int16 rWordType, sal_Bool bDirection )
159 sal_Int32 len = Text.getLength();
160 if( nPos < 0 || len == 0 )
161 result.endPos = result.startPos = 0;
162 else if (nPos > len)
163 result.endPos = result.startPos = len;
164 else {
165 sal_Int32 next, prev;
166 next = skipSpace(Text, nPos, len, rWordType, true);
167 prev = skipSpace(Text, nPos, len, rWordType, false);
168 if (prev == 0 && next == len) {
169 result.endPos = result.startPos = nPos;
170 } else if (prev == 0 && ! bDirection) {
171 result.endPos = result.startPos = 0;
172 } else if (next == len && bDirection) {
173 result.endPos = result.startPos = len;
174 } else {
175 if (next != prev) {
176 if (next == nPos && next != len)
177 bDirection = true;
178 else if (prev == nPos && prev != 0)
179 bDirection = false;
180 else
181 nPos = bDirection ? next : prev;
183 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
186 return result;
189 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
190 const Locale& rLocale, sal_Int16 rWordType )
192 sal_Int32 len = Text.getLength();
194 if (nPos < 0 || nPos >= len) return false;
196 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, true);
198 if (tmp != nPos) return false;
200 result = getWordBoundary(Text, nPos, rLocale, rWordType, true);
202 return result.startPos == nPos;
205 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
206 const Locale& rLocale, sal_Int16 rWordType )
208 sal_Int32 len = Text.getLength();
210 if (nPos <= 0 || nPos > len) return false;
212 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, false);
214 if (tmp != nPos) return false;
216 result = getWordBoundary(Text, nPos, rLocale, rWordType, false);
218 return result.endPos == nPos;
221 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
222 const Locale &rLocale )
224 if (nStartPos < 0 || nStartPos > Text.getLength())
225 return -1;
226 if (Text.isEmpty()) return 0;
227 return LBI->beginOfSentence(Text, nStartPos, rLocale);
230 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
231 const Locale &rLocale )
233 if (nStartPos < 0 || nStartPos > Text.getLength())
234 return -1;
235 if (Text.isEmpty()) return 0;
236 return LBI->endOfSentence(Text, nStartPos, rLocale);
239 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
240 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
241 const LineBreakUserOptions& bOptions )
243 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
246 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
248 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
249 getScriptClass(Text.iterateCodePoints(&nPos, 0));
253 /** Increments/decrements position first, then obtains character.
254 @return current position, may be -1 or text length if string was consumed.
256 static sal_Int32 iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
257 sal_Int32 nLen = Text.getLength();
258 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
259 ch = 0;
260 nStartPos = nStartPos + inc < 0 ? -1 : nLen;
261 } else {
262 ch = Text.iterateCodePoints(&nStartPos, inc);
263 // Fix for #i80436#.
264 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
265 // suspicious as if it cures a symptom.. anyway, had to add
266 // nStartPos < Text.getLength() to silence the (correct) assertion
267 // in rtl_uString_iterateCodePoints() if Text was one character
268 // (codepoint) only, made up of a surrogate pair.
269 //if (inc > 0 && nStartPos < Text.getLength())
270 // ch = Text.iterateCodePoints(&nStartPos, 0);
271 // With surrogates, nStartPos may actually point behind string
272 // now, even if inc is only +1
273 if (inc > 0)
274 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
276 return nStartPos;
280 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
281 sal_Int32 nStartPos, sal_Int16 ScriptType )
283 if (nStartPos < 0 || nStartPos >= Text.getLength())
284 return -1;
286 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
287 return -1;
289 if (nStartPos == 0) return 0;
290 sal_uInt32 ch=0;
291 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
292 if (nStartPos == 0) return 0;
295 return iterateCodePoints(Text, nStartPos, 1, ch);
298 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
299 sal_Int32 nStartPos, sal_Int16 ScriptType )
301 if (nStartPos < 0 || nStartPos >= Text.getLength())
302 return -1;
304 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
305 return -1;
307 sal_Int32 strLen = Text.getLength();
308 sal_uInt32 ch=0;
309 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
310 sal_Int16 currentCharScriptType = getScriptClass(ch);
311 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
312 break;
314 return nStartPos;
317 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
318 sal_Int32 nStartPos, sal_Int16 ScriptType )
320 if (nStartPos < 0)
321 return -1;
322 if (nStartPos > Text.getLength())
323 nStartPos = Text.getLength();
325 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
327 sal_uInt32 ch=0;
328 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
329 if (((numberOfChange % 2) == 0) != (ScriptType != getScriptClass(ch)))
330 numberOfChange--;
331 else if (nStartPos == 0) {
332 return -1;
335 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
338 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
339 sal_Int16 ScriptType )
342 if (nStartPos < 0)
343 nStartPos = 0;
344 sal_Int32 strLen = Text.getLength();
345 if (nStartPos >= strLen)
346 return -1;
348 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
350 sal_uInt32 ch=0;
351 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
352 sal_Int16 currentCharScriptType = getScriptClass(ch);
353 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
354 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
355 numberOfChange--;
357 return numberOfChange == 0 ? nStartPos : -1;
360 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
361 const Locale& /*rLocale*/, sal_Int16 CharType )
363 if (CharType == CharType::ANY_CHAR) return 0;
364 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
365 if (CharType != static_cast<sal_Int16>(u_charType( Text.iterateCodePoints(&nStartPos, 0)))) return -1;
367 sal_Int32 nPos=nStartPos;
368 while(nStartPos > 0 && CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, -1)))) { nStartPos=nPos; }
369 return nStartPos; // begin of char block is inclusive
372 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
373 const Locale& /*rLocale*/, sal_Int16 CharType )
375 sal_Int32 strLen = Text.getLength();
377 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
378 if (nStartPos < 0 || nStartPos >= strLen) return -1;
379 if (CharType != static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) return -1;
381 sal_uInt32 ch=0;
382 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == static_cast<sal_Int16>(u_charType(ch))) {}
383 return nStartPos; // end of char block is exclusive
386 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
387 const Locale& /*rLocale*/, sal_Int16 CharType )
389 if (CharType == CharType::ANY_CHAR) return -1;
390 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
392 sal_Int16 numberOfChange = (CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) ? 2 : 1;
393 sal_Int32 strLen = Text.getLength();
395 sal_uInt32 ch=0;
396 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
397 if ((CharType != static_cast<sal_Int16>(u_charType(ch))) != (numberOfChange == 1))
398 numberOfChange--;
400 return numberOfChange == 0 ? nStartPos : -1;
403 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
404 const Locale& /*rLocale*/, sal_Int16 CharType )
406 if(CharType == CharType::ANY_CHAR) return -1;
407 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
409 sal_Int16 numberOfChange = (CharType == static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nStartPos, 0)))) ? 3 : 2;
411 sal_uInt32 ch=0;
412 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
413 if (((numberOfChange % 2) == 0) != (CharType != static_cast<sal_Int16>(u_charType(ch))))
414 numberOfChange--;
415 if (nStartPos == 0 && numberOfChange > 0) {
416 numberOfChange--;
417 if (numberOfChange == 0) return nStartPos;
420 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
424 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
425 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ )
427 return 0;
430 namespace
432 sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
434 int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
435 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
438 struct UBlock2Script
440 UBlockCode from;
441 UBlockCode to;
442 sal_Int16 script;
445 static const UBlock2Script scriptList[] =
447 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
448 {UBLOCK_BASIC_LATIN, UBLOCK_SPACING_MODIFIER_LETTERS, ScriptType::LATIN},
449 {UBLOCK_GREEK, UBLOCK_ARMENIAN, ScriptType::LATIN},
450 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
451 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
452 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
453 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
454 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
455 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
456 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
457 {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
458 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
459 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
460 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
461 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
462 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
463 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
464 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
465 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
466 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
469 #define scriptListCount SAL_N_ELEMENTS(scriptList)
471 //always sets rScriptType
473 //returns true for characters historically explicitly assigned to
474 //latin/weak/asian
476 //returns false for characters that historically implicitly assigned to
477 //weak as unknown
478 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
480 bool bKnown = true;
481 //handle specific characters always as weak:
482 // 0x01 - this breaks a word
483 // 0x02 - this can be inside a word
484 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
485 if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
486 rScriptType = ScriptType::WEAK;
487 // Few Spacing Modifier Letters that can be Bopomofo tonal marks.
488 else if ( 0x2CA == currentChar || 0x2CB == currentChar || 0x2C7 == currentChar || 0x2D9 == currentChar )
489 rScriptType = ScriptType::WEAK;
490 // workaround for Coptic
491 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
492 rScriptType = ScriptType::LATIN;
493 else
495 UBlockCode block=ublock_getCode(currentChar);
496 size_t i = 0;
497 while (i < scriptListCount)
499 if (block <= scriptList[i].to)
500 break;
501 ++i;
503 if (i < scriptListCount && block >= scriptList[i].from)
504 rScriptType = scriptList[i].script;
505 else
507 rScriptType = ScriptType::WEAK;
508 bKnown = false;
511 return bKnown;
515 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
517 static sal_uInt32 lastChar = 0;
518 static sal_Int16 nRet = ScriptType::WEAK;
520 if (currentChar != lastChar)
522 lastChar = currentChar;
524 if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
525 nRet = getScriptClassByUAX24Script(currentChar);
528 return nRet;
531 bool BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName)
533 // to share service between same Language but different Country code, like zh_CN and zh_TW
534 for (lookupTableItem& listItem : lookupTable) {
535 if (aLocaleName == listItem.aLocale.Language) {
536 xBI = listItem.xBI;
537 return true;
541 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ja
542 if (aLocaleName == "ja")
543 return false;
544 #endif
545 #if !WITH_LOCALE_ALL && !WITH_LOCALE_zh
546 if (aLocaleName == "zh" || aLocaleName == "zh_TW")
547 return false;
548 #endif
549 #if !WITH_LOCALE_ALL && !WITH_LOCALE_ko
550 if (aLocaleName == "ko")
551 return false;
552 #endif
553 #if !WITH_LOCALE_ALL && !WITH_LOCALE_th
554 if (aLocaleName == "th")
555 return false;
556 #endif
558 Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
559 "com.sun.star.i18n.BreakIterator_" + aLocaleName, m_xContext);
561 if ( xI.is() ) {
562 xBI.set(xI, UNO_QUERY);
563 if (xBI.is()) {
564 lookupTable.emplace_back(Locale(aLocaleName, aLocaleName, aLocaleName), xBI);
565 return true;
568 return false;
571 Reference < XBreakIterator >
572 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale)
574 if (xBI.is() && rLocale == aLocale)
575 return xBI;
576 else if (m_xContext.is()) {
577 aLocale = rLocale;
579 for (lookupTableItem& listItem : lookupTable) {
580 if (rLocale == listItem.aLocale)
581 return xBI = listItem.xBI;
584 sal_Unicode under = '_';
586 sal_Int32 l = rLocale.Language.getLength();
587 sal_Int32 c = rLocale.Country.getLength();
588 sal_Int32 v = rLocale.Variant.getLength();
589 OUStringBuffer aBuf(l+c+v+3);
591 if ((l > 0 && c > 0 && v > 0 &&
592 // load service with name <base>_<lang>_<country>_<variant>
593 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
594 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
595 (l > 0 && c > 0 &&
596 // load service with name <base>_<lang>_<country>
597 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
598 rLocale.Country).makeStringAndClear())) ||
599 (l > 0 && c > 0 && rLocale.Language == "zh" &&
600 (rLocale.Country == "HK" ||
601 rLocale.Country == "MO" ) &&
602 // if the country code is HK or MO, one more step to try TW.
603 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
604 "TW").makeStringAndClear())) ||
605 (l > 0 &&
606 // load service with name <base>_<lang>
607 createLocaleSpecificBreakIterator(rLocale.Language)) ||
608 // load default service with name <base>_Unicode
609 createLocaleSpecificBreakIterator("Unicode")) {
610 lookupTable.emplace_back( aLocale, xBI );
611 return xBI;
614 throw RuntimeException();
617 OUString SAL_CALL
618 BreakIteratorImpl::getImplementationName()
620 return OUString("com.sun.star.i18n.BreakIterator");
623 sal_Bool SAL_CALL
624 BreakIteratorImpl::supportsService(const OUString& rServiceName)
626 return cppu::supportsService(this, rServiceName);
629 Sequence< OUString > SAL_CALL
630 BreakIteratorImpl::getSupportedServiceNames()
632 Sequence< OUString > aRet { "com.sun.star.i18n.BreakIterator" };
633 return aRet;
638 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
639 com_sun_star_i18n_BreakIterator_get_implementation(
640 css::uno::XComponentContext *context,
641 css::uno::Sequence<css::uno::Any> const &)
643 return cppu::acquire(new i18npool::BreakIteratorImpl(context));
646 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */