update dev300-m58
[ooovba.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blobb66ebcfdb78b238d1f0a15ab5c9ba5f823b911d5
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: breakiteratorImpl.cxx,v $
10 * $Revision: 1.27.4.2 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <breakiteratorImpl.hxx>
35 #include <unicode/uchar.h>
36 #include <rtl/ustrbuf.hxx>
38 using namespace ::com::sun::star::uno;
39 using namespace ::com::sun::star::lang;
40 using namespace ::rtl;
42 namespace com { namespace sun { namespace star { namespace i18n {
44 BreakIteratorImpl::BreakIteratorImpl( const Reference < XMultiServiceFactory >& rxMSF ) : xMSF( rxMSF )
48 BreakIteratorImpl::BreakIteratorImpl()
52 BreakIteratorImpl::~BreakIteratorImpl()
54 // Clear lookuptable
55 for (size_t l = 0; l < lookupTable.size(); l++)
56 delete lookupTable[l];
57 lookupTable.clear();
60 #define LBI getLocaleSpecificBreakIterator(rLocale)
62 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
63 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
64 throw(RuntimeException)
66 if (nCount < 0) throw RuntimeException();
68 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
71 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
72 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
73 throw(RuntimeException)
75 if (nCount < 0) throw RuntimeException();
77 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
80 #define isZWSP(c) (ch == 0x200B)
82 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, sal_Bool bDirection)
84 sal_uInt32 ch=0;
85 sal_Int32 pos=nPos;
86 switch (rWordType) {
87 case WordType::ANYWORD_IGNOREWHITESPACES:
88 if (bDirection)
89 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
90 else
91 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
92 break;
93 case WordType::DICTIONARY_WORD:
94 if (bDirection)
95 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) ||
96 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
97 else
98 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
99 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
100 break;
101 case WordType::WORD_COUNT:
102 if (bDirection)
103 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
104 else
105 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
106 break;
108 return nPos;
111 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
112 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
114 sal_Int32 len = Text.getLength();
115 if( nStartPos < 0 || len == 0 )
116 result.endPos = result.startPos = 0;
117 else if (nStartPos >= len)
118 result.endPos = result.startPos = len;
119 else {
120 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
122 nStartPos = skipSpace(Text, result.startPos, len, rWordType, sal_True);
124 if ( nStartPos != result.startPos) {
125 if( nStartPos >= len )
126 result.startPos = result.endPos = len;
127 else {
128 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True);
129 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
130 if (result.startPos < nStartPos) result.startPos = nStartPos;
134 return result;
137 static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) {
138 return rLocale.Language.equalsAscii("zh") || rLocale.Language.equalsAscii("ja") || rLocale.Language.equalsAscii("ko");
141 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
142 const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException)
144 sal_Int32 len = Text.getLength();
145 if( nStartPos <= 0 || len == 0 ) {
146 result.endPos = result.startPos = 0;
147 return result;
148 } else if (nStartPos > len) {
149 result.endPos = result.startPos = len;
150 return result;
153 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, sal_False);
155 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return
156 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
157 result.startPos = nPos;
158 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
159 result.endPos = -1;
160 return result;
163 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
167 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
168 sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException)
170 sal_Int32 len = Text.getLength();
171 if( nPos < 0 || len == 0 )
172 result.endPos = result.startPos = 0;
173 else if (nPos > len)
174 result.endPos = result.startPos = len;
175 else {
176 sal_Int32 next, prev;
177 next = skipSpace(Text, nPos, len, rWordType, sal_True);
178 prev = skipSpace(Text, nPos, len, rWordType, sal_False);
179 if (prev == 0 && next == len) {
180 result.endPos = result.startPos = nPos;
181 } else if (prev == 0 && ! bDirection) {
182 result.endPos = result.startPos = 0;
183 } else if (next == len && bDirection) {
184 result.endPos = result.startPos = len;
185 } else {
186 if (next != prev) {
187 if (next == nPos && next != len)
188 bDirection = sal_True;
189 else if (prev == nPos && prev != 0)
190 bDirection = sal_False;
191 else
192 nPos = bDirection ? next : prev;
194 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
197 return result;
200 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
201 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
203 sal_Int32 len = Text.getLength();
205 if (nPos < 0 || nPos >= len) return sal_False;
207 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_True);
209 if (tmp != nPos) return sal_False;
211 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True);
213 return result.startPos == nPos;
216 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
217 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
219 sal_Int32 len = Text.getLength();
221 if (nPos <= 0 || nPos > len) return sal_False;
223 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_False);
225 if (tmp != nPos) return sal_False;
227 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False);
229 return result.endPos == nPos;
232 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
233 const Locale &rLocale ) throw(RuntimeException)
235 if (nStartPos < 0 || nStartPos > Text.getLength())
236 return -1;
237 if (Text.getLength() == 0) return 0;
238 return LBI->beginOfSentence(Text, nStartPos, rLocale);
241 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
242 const Locale &rLocale ) throw(RuntimeException)
244 if (nStartPos < 0 || nStartPos > Text.getLength())
245 return -1;
246 if (Text.getLength() == 0) return 0;
247 return LBI->endOfSentence(Text, nStartPos, rLocale);
250 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
251 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
252 const LineBreakUserOptions& bOptions ) throw(RuntimeException)
254 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
257 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
258 throw(RuntimeException)
260 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
261 getScriptClass(Text.iterateCodePoints(&nPos, 0));
264 static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
265 if (nStartPos + inc < 0 || nStartPos + inc >= Text.getLength()) {
266 ch = 0;
267 nStartPos = nStartPos + inc < 0 ? -1 : Text.getLength();
268 } else {
269 ch = Text.iterateCodePoints(&nStartPos, inc);
270 if (inc > 0) ch = Text.iterateCodePoints(&nStartPos, 0);
272 return nStartPos;
276 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
277 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
279 if (nStartPos < 0 || nStartPos >= Text.getLength())
280 return -1;
282 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
283 return -1;
285 if (nStartPos == 0) return 0;
286 sal_uInt32 ch=0;
287 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
288 if (nStartPos == 0) return 0;
291 return iterateCodePoints(Text, nStartPos, 1, ch);
294 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
295 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
297 if (nStartPos < 0 || nStartPos >= Text.getLength())
298 return -1;
300 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
301 return -1;
303 sal_Int32 strLen = Text.getLength();
304 sal_uInt32 ch=0;
305 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
306 sal_Int16 currentCharScriptType = getScriptClass(ch);
307 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
308 break;
310 return nStartPos;
313 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
314 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
316 if (nStartPos < 0)
317 return -1;
318 if (nStartPos > Text.getLength())
319 nStartPos = Text.getLength();
321 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
323 sal_uInt32 ch=0;
324 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
325 if ((((numberOfChange % 2) == 0) ^ (ScriptType != getScriptClass(ch))))
326 numberOfChange--;
327 else if (nStartPos == 0) {
328 if (numberOfChange > 0)
329 numberOfChange--;
330 if (nStartPos > 0)
331 Text.iterateCodePoints(&nStartPos, -1);
332 else
333 return -1;
336 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
339 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
340 sal_Int16 ScriptType ) throw(RuntimeException)
343 if (nStartPos < 0)
344 nStartPos = 0;
345 sal_Int32 strLen = Text.getLength();
346 if (nStartPos > strLen)
347 return -1;
349 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
351 sal_uInt32 ch=0;
352 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
353 sal_Int16 currentCharScriptType = getScriptClass(ch);
354 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
355 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
356 numberOfChange--;
358 return numberOfChange == 0 ? nStartPos : -1;
361 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
362 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
364 if (CharType == CharType::ANY_CHAR) return 0;
365 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
366 if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1;
368 sal_Int32 nPos=nStartPos;
369 while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; }
370 return nStartPos; // begin of char block is inclusive
373 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
374 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
376 sal_Int32 strLen = Text.getLength();
378 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
379 if (nStartPos < 0 || nStartPos >= strLen) return -1;
380 if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1;
382 sal_uInt32 ch=0;
383 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {}
384 return nStartPos; // end of char block is exclusive
387 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
388 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
390 if (CharType == CharType::ANY_CHAR) return -1;
391 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
393 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
394 sal_Int32 strLen = Text.getLength();
396 sal_uInt32 ch=0;
397 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
398 if ((CharType != (sal_Int16)u_charType(ch)) ^ (numberOfChange == 1))
399 numberOfChange--;
401 return numberOfChange == 0 ? nStartPos : -1;
404 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
405 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
407 if(CharType == CharType::ANY_CHAR) return -1;
408 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
410 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
412 sal_uInt32 ch=0;
413 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
414 if (((numberOfChange % 2) == 0) ^ (CharType != (sal_Int16)u_charType(ch)))
415 numberOfChange--;
416 if (nStartPos == 0 && numberOfChange > 0) {
417 numberOfChange--;
418 if (numberOfChange == 0) return nStartPos;
421 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
426 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
427 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException)
429 return 0;
432 typedef struct {
433 UBlockCode from;
434 UBlockCode to;
435 sal_Int16 script;
436 } UBlock2Script;
438 static UBlock2Script scriptList[] = {
439 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
440 {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
441 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
442 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
443 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
444 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
445 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
446 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
447 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
448 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
449 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
450 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
451 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
452 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
453 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
454 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
455 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
456 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
459 #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script)
461 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
463 static sal_uInt32 lastChar = 0;
464 static sal_Int16 nRet = 0;
466 if (currentChar != lastChar) {
467 lastChar = currentChar;
469 //JP 21.9.2001: handle specific characters - always as weak
470 // definition of 1 - this breaks a word
471 // 2 - this can be inside a word
472 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
473 if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
474 nRet = ScriptType::WEAK;
475 // workaround for Coptic
476 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
477 nRet = ScriptType::LATIN;
478 else {
479 UBlockCode block=ublock_getCode(currentChar);
480 sal_uInt16 i;
481 for ( i = 0; i < scriptListCount; i++) {
482 if (block <= scriptList[i].to) break;
484 nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK;
487 return nRet;
490 static inline sal_Bool operator == (const Locale& l1, const Locale& l2) {
491 return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant;
494 sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException )
496 // to share service between same Language but different Country code, like zh_CN and zh_TW
497 for (size_t l = 0; l < lookupTable.size(); l++) {
498 lookupTableItem *listItem = lookupTable[l];
499 if (aLocaleName == listItem->aLocale.Language) {
500 xBI = listItem->xBI;
501 return sal_True;
505 Reference < uno::XInterface > xI = xMSF->createInstance(
506 OUString::createFromAscii("com.sun.star.i18n.BreakIterator_") + aLocaleName);
508 if ( xI.is() ) {
509 xI->queryInterface( getCppuType((const Reference< XBreakIterator>*)0) ) >>= xBI;
510 if (xBI.is()) {
511 lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI));
512 return sal_True;
515 return sal_False;
518 Reference < XBreakIterator > SAL_CALL
519 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException)
521 if (xBI.is() && rLocale == aLocale)
522 return xBI;
523 else if (xMSF.is()) {
524 aLocale = rLocale;
526 for (size_t i = 0; i < lookupTable.size(); i++) {
527 lookupTableItem *listItem = lookupTable[i];
528 if (rLocale == listItem->aLocale)
529 return xBI = listItem->xBI;
532 sal_Unicode under = (sal_Unicode)'_';
534 sal_Int32 l = rLocale.Language.getLength();
535 sal_Int32 c = rLocale.Country.getLength();
536 sal_Int32 v = rLocale.Variant.getLength();
537 OUStringBuffer aBuf(l+c+v+3);
539 if ((l > 0 && c > 0 && v > 0 &&
540 // load service with name <base>_<lang>_<country>_<varian>
541 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
542 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
543 (l > 0 && c > 0 &&
544 // load service with name <base>_<lang>_<country>
545 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
546 rLocale.Country).makeStringAndClear())) ||
547 (l > 0 && c > 0 && rLocale.Language.compareToAscii("zh") == 0 &&
548 (rLocale.Country.compareToAscii("HK") == 0 ||
549 rLocale.Country.compareToAscii("MO") == 0) &&
550 // if the country code is HK or MO, one more step to try TW.
551 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii(
552 "TW").makeStringAndClear())) ||
553 (l > 0 &&
554 // load service with name <base>_<lang>
555 createLocaleSpecificBreakIterator(rLocale.Language)) ||
556 // load default service with name <base>_Unicode
557 createLocaleSpecificBreakIterator(OUString::createFromAscii("Unicode"))) {
558 lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
559 return xBI;
562 throw RuntimeException();
565 const sal_Char cBreakIterator[] = "com.sun.star.i18n.BreakIterator";
567 OUString SAL_CALL
568 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException )
570 return OUString::createFromAscii(cBreakIterator);
573 sal_Bool SAL_CALL
574 BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException )
576 return !rServiceName.compareToAscii(cBreakIterator);
579 Sequence< OUString > SAL_CALL
580 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException )
582 Sequence< OUString > aRet(1);
583 aRet[0] = OUString::createFromAscii(cBreakIterator);
584 return aRet;
587 } } } }