Version 5.2.6.1, tag libreoffice-5.2.6.1
[LibreOffice.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blob33ac5864a79c20a8003b1b1b283d9c7c38f479e6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <breakiteratorImpl.hxx>
21 #include <cppuhelper/supportsservice.hxx>
22 #include <unicode/uchar.h>
23 #include <i18nutil/unicode.hxx>
24 #include <rtl/ustrbuf.hxx>
26 using namespace ::com::sun::star::uno;
27 using namespace ::com::sun::star::lang;
29 namespace com { namespace sun { namespace star { namespace i18n {
31 BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
35 BreakIteratorImpl::BreakIteratorImpl()
39 BreakIteratorImpl::~BreakIteratorImpl()
41 // Clear lookuptable
42 for (lookupTableItem* p : lookupTable)
43 delete p;
44 lookupTable.clear();
47 #define LBI getLocaleSpecificBreakIterator(rLocale)
49 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
50 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
51 throw(RuntimeException, std::exception)
53 if (nCount < 0) throw RuntimeException();
55 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
58 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
59 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
60 throw(RuntimeException, std::exception)
62 if (nCount < 0) throw RuntimeException();
64 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
67 #define isZWSP(c) (ch == 0x200B)
69 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, bool bDirection)
71 sal_uInt32 ch=0;
72 sal_Int32 pos=nPos;
73 switch (rWordType) {
74 case WordType::ANYWORD_IGNOREWHITESPACES:
75 if (bDirection)
76 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos)) || isZWSP(ch))) nPos=pos;
77 else
78 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
79 break;
80 case WordType::DICTIONARY_WORD:
81 if (bDirection)
82 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos)) || isZWSP(ch) ||
83 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
84 else
85 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
86 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
87 break;
88 case WordType::WORD_COUNT:
89 if (bDirection)
90 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos)) || isZWSP(ch))) nPos=pos;
91 else
92 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
93 break;
95 return nPos;
98 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
99 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
101 sal_Int32 len = Text.getLength();
102 if( nStartPos < 0 || len == 0 )
103 result.endPos = result.startPos = 0;
104 else if (nStartPos >= len)
105 result.endPos = result.startPos = len;
106 else {
107 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
109 nStartPos = skipSpace(Text, result.startPos, len, rWordType, true);
111 if ( nStartPos != result.startPos) {
112 if( nStartPos >= len )
113 result.startPos = result.endPos = len;
114 else {
115 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, true);
116 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
117 if (result.startPos < nStartPos) result.startPos = nStartPos;
121 return result;
124 static inline bool SAL_CALL isCJK( const Locale& rLocale ) {
125 return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
128 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
129 const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException, std::exception)
131 sal_Int32 len = Text.getLength();
132 if( nStartPos <= 0 || len == 0 ) {
133 result.endPos = result.startPos = 0;
134 return result;
135 } else if (nStartPos > len) {
136 result.endPos = result.startPos = len;
137 return result;
140 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, false);
142 // if some spaces are skipped, and the script type is Asian with no CJK rLocale, we have to return
143 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
144 result.startPos = nPos;
145 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
146 result.endPos = -1;
147 return result;
150 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
154 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
155 sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException, std::exception)
157 sal_Int32 len = Text.getLength();
158 if( nPos < 0 || len == 0 )
159 result.endPos = result.startPos = 0;
160 else if (nPos > len)
161 result.endPos = result.startPos = len;
162 else {
163 sal_Int32 next, prev;
164 next = skipSpace(Text, nPos, len, rWordType, true);
165 prev = skipSpace(Text, nPos, len, rWordType, false);
166 if (prev == 0 && next == len) {
167 result.endPos = result.startPos = nPos;
168 } else if (prev == 0 && ! bDirection) {
169 result.endPos = result.startPos = 0;
170 } else if (next == len && bDirection) {
171 result.endPos = result.startPos = len;
172 } else {
173 if (next != prev) {
174 if (next == nPos && next != len)
175 bDirection = true;
176 else if (prev == nPos && prev != 0)
177 bDirection = false;
178 else
179 nPos = bDirection ? next : prev;
181 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
184 return result;
187 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
188 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
190 sal_Int32 len = Text.getLength();
192 if (nPos < 0 || nPos >= len) return false;
194 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, true);
196 if (tmp != nPos) return false;
198 result = getWordBoundary(Text, nPos, rLocale, rWordType, true);
200 return result.startPos == nPos;
203 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
204 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
206 sal_Int32 len = Text.getLength();
208 if (nPos <= 0 || nPos > len) return false;
210 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, false);
212 if (tmp != nPos) return false;
214 result = getWordBoundary(Text, nPos, rLocale, rWordType, false);
216 return result.endPos == nPos;
219 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
220 const Locale &rLocale ) throw(RuntimeException, std::exception)
222 if (nStartPos < 0 || nStartPos > Text.getLength())
223 return -1;
224 if (Text.isEmpty()) return 0;
225 return LBI->beginOfSentence(Text, nStartPos, rLocale);
228 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
229 const Locale &rLocale ) throw(RuntimeException, std::exception)
231 if (nStartPos < 0 || nStartPos > Text.getLength())
232 return -1;
233 if (Text.isEmpty()) return 0;
234 return LBI->endOfSentence(Text, nStartPos, rLocale);
237 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
238 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
239 const LineBreakUserOptions& bOptions ) throw(RuntimeException, std::exception)
241 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
244 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
245 throw(RuntimeException, std::exception)
247 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
248 getScriptClass(Text.iterateCodePoints(&nPos, 0));
252 /** Increments/decrements position first, then obtains character.
253 @return current position, may be -1 or text length if string was consumed.
255 static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
256 sal_Int32 nLen = Text.getLength();
257 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
258 ch = 0;
259 nStartPos = nStartPos + inc < 0 ? -1 : nLen;
260 } else {
261 ch = Text.iterateCodePoints(&nStartPos, inc);
262 // Fix for #i80436#.
263 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
264 // suspicious as if it cures a symptom.. anyway, had to add
265 // nStartPos < Text.getLength() to silence the (correct) assertion
266 // in rtl_uString_iterateCodePoints() if Text was one character
267 // (codepoint) only, made up of a surrogate pair.
268 //if (inc > 0 && nStartPos < Text.getLength())
269 // ch = Text.iterateCodePoints(&nStartPos, 0);
270 // With surrogates, nStartPos may actually point behind string
271 // now, even if inc is only +1
272 if (inc > 0)
273 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
275 return nStartPos;
279 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
280 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
282 if (nStartPos < 0 || nStartPos >= Text.getLength())
283 return -1;
285 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
286 return -1;
288 if (nStartPos == 0) return 0;
289 sal_uInt32 ch=0;
290 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
291 if (nStartPos == 0) return 0;
294 return iterateCodePoints(Text, nStartPos, 1, ch);
297 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
298 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
300 if (nStartPos < 0 || nStartPos >= Text.getLength())
301 return -1;
303 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
304 return -1;
306 sal_Int32 strLen = Text.getLength();
307 sal_uInt32 ch=0;
308 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
309 sal_Int16 currentCharScriptType = getScriptClass(ch);
310 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
311 break;
313 return nStartPos;
316 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
317 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
319 if (nStartPos < 0)
320 return -1;
321 if (nStartPos > Text.getLength())
322 nStartPos = Text.getLength();
324 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
326 sal_uInt32 ch=0;
327 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
328 if ((((numberOfChange % 2) == 0) != (ScriptType != getScriptClass(ch))))
329 numberOfChange--;
330 else if (nStartPos == 0) {
331 return -1;
334 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
337 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
338 sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
341 if (nStartPos < 0)
342 nStartPos = 0;
343 sal_Int32 strLen = Text.getLength();
344 if (nStartPos >= strLen)
345 return -1;
347 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
349 sal_uInt32 ch=0;
350 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
351 sal_Int16 currentCharScriptType = getScriptClass(ch);
352 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
353 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
354 numberOfChange--;
356 return numberOfChange == 0 ? nStartPos : -1;
359 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
360 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
362 if (CharType == CharType::ANY_CHAR) return 0;
363 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
364 if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1;
366 sal_Int32 nPos=nStartPos;
367 while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; }
368 return nStartPos; // begin of char block is inclusive
371 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
372 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
374 sal_Int32 strLen = Text.getLength();
376 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
377 if (nStartPos < 0 || nStartPos >= strLen) return -1;
378 if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1;
380 sal_uInt32 ch=0;
381 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {}
382 return nStartPos; // end of char block is exclusive
385 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
386 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
388 if (CharType == CharType::ANY_CHAR) return -1;
389 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
391 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
392 sal_Int32 strLen = Text.getLength();
394 sal_uInt32 ch=0;
395 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
396 if ((CharType != (sal_Int16)u_charType(ch)) != (numberOfChange == 1))
397 numberOfChange--;
399 return numberOfChange == 0 ? nStartPos : -1;
402 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
403 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
405 if(CharType == CharType::ANY_CHAR) return -1;
406 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
408 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
410 sal_uInt32 ch=0;
411 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
412 if (((numberOfChange % 2) == 0) != (CharType != (sal_Int16)u_charType(ch)))
413 numberOfChange--;
414 if (nStartPos == 0 && numberOfChange > 0) {
415 numberOfChange--;
416 if (numberOfChange == 0) return nStartPos;
419 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
423 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
424 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception)
426 return 0;
429 namespace
431 sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
433 int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
434 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
437 struct UBlock2Script
439 UBlockCode from;
440 UBlockCode to;
441 sal_Int16 script;
444 static const UBlock2Script scriptList[] =
446 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
447 {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
448 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
449 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
450 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
451 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
452 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
453 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
454 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
455 {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
456 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
457 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
458 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
459 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
460 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
461 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
462 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
463 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
464 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
467 #define scriptListCount SAL_N_ELEMENTS(scriptList)
469 //always sets rScriptType
471 //returns true for characters historically explicitly assigned to
472 //latin/weak/asian
474 //returns false for characters that historically implicitly assigned to
475 //weak as unknown
476 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
478 bool bKnown = true;
479 //handle specific characters always as weak:
480 // 0x01 - this breaks a word
481 // 0x02 - this can be inside a word
482 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
483 if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
484 rScriptType = ScriptType::WEAK;
485 // workaround for Coptic
486 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
487 rScriptType = ScriptType::LATIN;
488 else
490 UBlockCode block=ublock_getCode(currentChar);
491 size_t i = 0;
492 while (i < scriptListCount)
494 if (block <= scriptList[i].to)
495 break;
496 ++i;
498 if (i < scriptListCount && block >= scriptList[i].from)
499 rScriptType = scriptList[i].script;
500 else
502 rScriptType = ScriptType::WEAK;
503 bKnown = false;
506 return bKnown;
510 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
512 static sal_uInt32 lastChar = 0;
513 static sal_Int16 nRet = 0;
515 if (currentChar != lastChar)
517 lastChar = currentChar;
519 if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
520 nRet = getScriptClassByUAX24Script(currentChar);
523 return nRet;
526 bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException )
528 // to share service between same Language but different Country code, like zh_CN and zh_TW
529 for (lookupTableItem* listItem : lookupTable) {
530 if (aLocaleName == listItem->aLocale.Language) {
531 xBI = listItem->xBI;
532 return true;
536 Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
537 "com.sun.star.i18n.BreakIterator_" + aLocaleName, m_xContext);
539 if ( xI.is() ) {
540 xBI.set(xI, UNO_QUERY);
541 if (xBI.is()) {
542 lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI));
543 return true;
546 return false;
549 Reference < XBreakIterator > SAL_CALL
550 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException)
552 if (xBI.is() && rLocale == aLocale)
553 return xBI;
554 else if (m_xContext.is()) {
555 aLocale = rLocale;
557 for (lookupTableItem* listItem : lookupTable) {
558 if (rLocale == listItem->aLocale)
559 return xBI = listItem->xBI;
562 sal_Unicode under = (sal_Unicode)'_';
564 sal_Int32 l = rLocale.Language.getLength();
565 sal_Int32 c = rLocale.Country.getLength();
566 sal_Int32 v = rLocale.Variant.getLength();
567 OUStringBuffer aBuf(l+c+v+3);
569 if ((l > 0 && c > 0 && v > 0 &&
570 // load service with name <base>_<lang>_<country>_<variant>
571 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
572 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
573 (l > 0 && c > 0 &&
574 // load service with name <base>_<lang>_<country>
575 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
576 rLocale.Country).makeStringAndClear())) ||
577 (l > 0 && c > 0 && rLocale.Language == "zh" &&
578 (rLocale.Country == "HK" ||
579 rLocale.Country == "MO" ) &&
580 // if the country code is HK or MO, one more step to try TW.
581 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
582 "TW").makeStringAndClear())) ||
583 (l > 0 &&
584 // load service with name <base>_<lang>
585 createLocaleSpecificBreakIterator(rLocale.Language)) ||
586 // load default service with name <base>_Unicode
587 createLocaleSpecificBreakIterator("Unicode")) {
588 lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
589 return xBI;
592 throw RuntimeException();
595 OUString SAL_CALL
596 BreakIteratorImpl::getImplementationName() throw( RuntimeException, std::exception )
598 return OUString("com.sun.star.i18n.BreakIterator");
601 sal_Bool SAL_CALL
602 BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
604 return cppu::supportsService(this, rServiceName);
607 Sequence< OUString > SAL_CALL
608 BreakIteratorImpl::getSupportedServiceNames() throw( RuntimeException, std::exception )
610 Sequence< OUString > aRet { "com.sun.star.i18n.BreakIterator" };
611 return aRet;
614 } } } }
616 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
617 com_sun_star_i18n_BreakIterator_get_implementation(
618 css::uno::XComponentContext *context,
619 css::uno::Sequence<css::uno::Any> const &)
621 return cppu::acquire(new css::i18n::BreakIteratorImpl(context));
624 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */