merged tag ooo/OOO330_m14
[LibreOffice.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blobe4e08d42df43aaa0f6e7ca64f6946816c8fb9da8
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
31 #include <breakiteratorImpl.hxx>
32 #include <unicode/uchar.h>
33 #include <rtl/ustrbuf.hxx>
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::lang;
37 using namespace ::rtl;
39 namespace com { namespace sun { namespace star { namespace i18n {
41 BreakIteratorImpl::BreakIteratorImpl( const Reference < XMultiServiceFactory >& rxMSF ) : xMSF( rxMSF )
45 BreakIteratorImpl::BreakIteratorImpl()
49 BreakIteratorImpl::~BreakIteratorImpl()
51 // Clear lookuptable
52 for (size_t l = 0; l < lookupTable.size(); l++)
53 delete lookupTable[l];
54 lookupTable.clear();
57 #define LBI getLocaleSpecificBreakIterator(rLocale)
59 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
60 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
61 throw(RuntimeException)
63 if (nCount < 0) throw RuntimeException();
65 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
68 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
69 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
70 throw(RuntimeException)
72 if (nCount < 0) throw RuntimeException();
74 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
77 #define isZWSP(c) (ch == 0x200B)
79 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, sal_Bool bDirection)
81 sal_uInt32 ch=0;
82 sal_Int32 pos=nPos;
83 switch (rWordType) {
84 case WordType::ANYWORD_IGNOREWHITESPACES:
85 if (bDirection)
86 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
87 else
88 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
89 break;
90 case WordType::DICTIONARY_WORD:
91 if (bDirection)
92 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) ||
93 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
94 else
95 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
96 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
97 break;
98 case WordType::WORD_COUNT:
99 if (bDirection)
100 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
101 else
102 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
103 break;
105 return nPos;
108 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
109 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
111 sal_Int32 len = Text.getLength();
112 if( nStartPos < 0 || len == 0 )
113 result.endPos = result.startPos = 0;
114 else if (nStartPos >= len)
115 result.endPos = result.startPos = len;
116 else {
117 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
119 nStartPos = skipSpace(Text, result.startPos, len, rWordType, sal_True);
121 if ( nStartPos != result.startPos) {
122 if( nStartPos >= len )
123 result.startPos = result.endPos = len;
124 else {
125 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True);
126 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
127 if (result.startPos < nStartPos) result.startPos = nStartPos;
131 return result;
134 static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) {
135 return rLocale.Language.equalsAscii("zh") || rLocale.Language.equalsAscii("ja") || rLocale.Language.equalsAscii("ko");
138 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
139 const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException)
141 sal_Int32 len = Text.getLength();
142 if( nStartPos <= 0 || len == 0 ) {
143 result.endPos = result.startPos = 0;
144 return result;
145 } else if (nStartPos > len) {
146 result.endPos = result.startPos = len;
147 return result;
150 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, sal_False);
152 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return
153 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
154 result.startPos = nPos;
155 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
156 result.endPos = -1;
157 return result;
160 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
164 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
165 sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException)
167 sal_Int32 len = Text.getLength();
168 if( nPos < 0 || len == 0 )
169 result.endPos = result.startPos = 0;
170 else if (nPos > len)
171 result.endPos = result.startPos = len;
172 else {
173 sal_Int32 next, prev;
174 next = skipSpace(Text, nPos, len, rWordType, sal_True);
175 prev = skipSpace(Text, nPos, len, rWordType, sal_False);
176 if (prev == 0 && next == len) {
177 result.endPos = result.startPos = nPos;
178 } else if (prev == 0 && ! bDirection) {
179 result.endPos = result.startPos = 0;
180 } else if (next == len && bDirection) {
181 result.endPos = result.startPos = len;
182 } else {
183 if (next != prev) {
184 if (next == nPos && next != len)
185 bDirection = sal_True;
186 else if (prev == nPos && prev != 0)
187 bDirection = sal_False;
188 else
189 nPos = bDirection ? next : prev;
191 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
194 return result;
197 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
198 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
200 sal_Int32 len = Text.getLength();
202 if (nPos < 0 || nPos >= len) return sal_False;
204 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_True);
206 if (tmp != nPos) return sal_False;
208 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True);
210 return result.startPos == nPos;
213 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
214 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
216 sal_Int32 len = Text.getLength();
218 if (nPos <= 0 || nPos > len) return sal_False;
220 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_False);
222 if (tmp != nPos) return sal_False;
224 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False);
226 return result.endPos == nPos;
229 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
230 const Locale &rLocale ) throw(RuntimeException)
232 if (nStartPos < 0 || nStartPos > Text.getLength())
233 return -1;
234 if (Text.getLength() == 0) return 0;
235 return LBI->beginOfSentence(Text, nStartPos, rLocale);
238 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
239 const Locale &rLocale ) throw(RuntimeException)
241 if (nStartPos < 0 || nStartPos > Text.getLength())
242 return -1;
243 if (Text.getLength() == 0) return 0;
244 return LBI->endOfSentence(Text, nStartPos, rLocale);
247 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
248 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
249 const LineBreakUserOptions& bOptions ) throw(RuntimeException)
251 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
254 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
255 throw(RuntimeException)
257 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
258 getScriptClass(Text.iterateCodePoints(&nPos, 0));
262 /** Increments/decrements position first, then obtains character.
263 @return current position, may be -1 or text length if string was consumed.
265 static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
266 sal_Int32 nLen = Text.getLength();
267 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
268 ch = 0;
269 nStartPos = nStartPos + inc < 0 ? -1 : nLen;
270 } else {
271 ch = Text.iterateCodePoints(&nStartPos, inc);
272 // Fix for #i80436#.
273 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
274 // suspicious as if it cures a symptom.. anyway, had to add
275 // nStartPos < Text.getLength() to silence the (correct) assertion
276 // in rtl_uString_iterateCodePoints() if Text was one character
277 // (codepoint) only, made up of a surrogate pair.
278 //if (inc > 0 && nStartPos < Text.getLength())
279 // ch = Text.iterateCodePoints(&nStartPos, 0);
280 // With surrogates, nStartPos may actually point behind string
281 // now, even if inc is only +1
282 if (inc > 0)
283 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
285 return nStartPos;
289 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
290 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
292 if (nStartPos < 0 || nStartPos >= Text.getLength())
293 return -1;
295 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
296 return -1;
298 if (nStartPos == 0) return 0;
299 sal_uInt32 ch=0;
300 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
301 if (nStartPos == 0) return 0;
304 return iterateCodePoints(Text, nStartPos, 1, ch);
307 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
308 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
310 if (nStartPos < 0 || nStartPos >= Text.getLength())
311 return -1;
313 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
314 return -1;
316 sal_Int32 strLen = Text.getLength();
317 sal_uInt32 ch=0;
318 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
319 sal_Int16 currentCharScriptType = getScriptClass(ch);
320 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
321 break;
323 return nStartPos;
326 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
327 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
329 if (nStartPos < 0)
330 return -1;
331 if (nStartPos > Text.getLength())
332 nStartPos = Text.getLength();
334 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
336 sal_uInt32 ch=0;
337 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
338 if ((((numberOfChange % 2) == 0) ^ (ScriptType != getScriptClass(ch))))
339 numberOfChange--;
340 else if (nStartPos == 0) {
341 if (numberOfChange > 0)
342 numberOfChange--;
343 if (nStartPos > 0)
344 Text.iterateCodePoints(&nStartPos, -1);
345 else
346 return -1;
349 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
352 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
353 sal_Int16 ScriptType ) throw(RuntimeException)
356 if (nStartPos < 0)
357 nStartPos = 0;
358 sal_Int32 strLen = Text.getLength();
359 if (nStartPos > strLen)
360 return -1;
362 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
364 sal_uInt32 ch=0;
365 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
366 sal_Int16 currentCharScriptType = getScriptClass(ch);
367 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
368 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
369 numberOfChange--;
371 return numberOfChange == 0 ? nStartPos : -1;
374 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
375 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
377 if (CharType == CharType::ANY_CHAR) return 0;
378 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
379 if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1;
381 sal_Int32 nPos=nStartPos;
382 while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; }
383 return nStartPos; // begin of char block is inclusive
386 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
387 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
389 sal_Int32 strLen = Text.getLength();
391 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
392 if (nStartPos < 0 || nStartPos >= strLen) return -1;
393 if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1;
395 sal_uInt32 ch=0;
396 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {}
397 return nStartPos; // end of char block is exclusive
400 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
401 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
403 if (CharType == CharType::ANY_CHAR) return -1;
404 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
406 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
407 sal_Int32 strLen = Text.getLength();
409 sal_uInt32 ch=0;
410 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
411 if ((CharType != (sal_Int16)u_charType(ch)) ^ (numberOfChange == 1))
412 numberOfChange--;
414 return numberOfChange == 0 ? nStartPos : -1;
417 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
418 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
420 if(CharType == CharType::ANY_CHAR) return -1;
421 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
423 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
425 sal_uInt32 ch=0;
426 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
427 if (((numberOfChange % 2) == 0) ^ (CharType != (sal_Int16)u_charType(ch)))
428 numberOfChange--;
429 if (nStartPos == 0 && numberOfChange > 0) {
430 numberOfChange--;
431 if (numberOfChange == 0) return nStartPos;
434 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
439 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
440 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException)
442 return 0;
445 typedef struct {
446 UBlockCode from;
447 UBlockCode to;
448 sal_Int16 script;
449 } UBlock2Script;
451 static UBlock2Script scriptList[] = {
452 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
453 {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
454 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
455 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
456 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
457 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
458 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
459 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
460 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
461 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
462 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
463 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
464 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
465 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
466 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
467 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
468 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
469 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
472 #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script)
474 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
476 static sal_uInt32 lastChar = 0;
477 static sal_Int16 nRet = 0;
479 if (currentChar != lastChar) {
480 lastChar = currentChar;
482 //JP 21.9.2001: handle specific characters - always as weak
483 // definition of 1 - this breaks a word
484 // 2 - this can be inside a word
485 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
486 if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
487 nRet = ScriptType::WEAK;
488 // workaround for Coptic
489 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
490 nRet = ScriptType::LATIN;
491 else {
492 UBlockCode block=ublock_getCode(currentChar);
493 sal_uInt16 i;
494 for ( i = 0; i < scriptListCount; i++) {
495 if (block <= scriptList[i].to) break;
497 nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK;
500 return nRet;
503 static inline sal_Bool operator == (const Locale& l1, const Locale& l2) {
504 return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant;
507 sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException )
509 // to share service between same Language but different Country code, like zh_CN and zh_TW
510 for (size_t l = 0; l < lookupTable.size(); l++) {
511 lookupTableItem *listItem = lookupTable[l];
512 if (aLocaleName == listItem->aLocale.Language) {
513 xBI = listItem->xBI;
514 return sal_True;
518 Reference < uno::XInterface > xI = xMSF->createInstance(
519 OUString::createFromAscii("com.sun.star.i18n.BreakIterator_") + aLocaleName);
521 if ( xI.is() ) {
522 xI->queryInterface( getCppuType((const Reference< XBreakIterator>*)0) ) >>= xBI;
523 if (xBI.is()) {
524 lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI));
525 return sal_True;
528 return sal_False;
531 Reference < XBreakIterator > SAL_CALL
532 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException)
534 if (xBI.is() && rLocale == aLocale)
535 return xBI;
536 else if (xMSF.is()) {
537 aLocale = rLocale;
539 for (size_t i = 0; i < lookupTable.size(); i++) {
540 lookupTableItem *listItem = lookupTable[i];
541 if (rLocale == listItem->aLocale)
542 return xBI = listItem->xBI;
545 sal_Unicode under = (sal_Unicode)'_';
547 sal_Int32 l = rLocale.Language.getLength();
548 sal_Int32 c = rLocale.Country.getLength();
549 sal_Int32 v = rLocale.Variant.getLength();
550 OUStringBuffer aBuf(l+c+v+3);
552 if ((l > 0 && c > 0 && v > 0 &&
553 // load service with name <base>_<lang>_<country>_<varian>
554 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
555 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
556 (l > 0 && c > 0 &&
557 // load service with name <base>_<lang>_<country>
558 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
559 rLocale.Country).makeStringAndClear())) ||
560 (l > 0 && c > 0 && rLocale.Language.compareToAscii("zh") == 0 &&
561 (rLocale.Country.compareToAscii("HK") == 0 ||
562 rLocale.Country.compareToAscii("MO") == 0) &&
563 // if the country code is HK or MO, one more step to try TW.
564 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii(
565 "TW").makeStringAndClear())) ||
566 (l > 0 &&
567 // load service with name <base>_<lang>
568 createLocaleSpecificBreakIterator(rLocale.Language)) ||
569 // load default service with name <base>_Unicode
570 createLocaleSpecificBreakIterator(OUString::createFromAscii("Unicode"))) {
571 lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
572 return xBI;
575 throw RuntimeException();
578 const sal_Char cBreakIterator[] = "com.sun.star.i18n.BreakIterator";
580 OUString SAL_CALL
581 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException )
583 return OUString::createFromAscii(cBreakIterator);
586 sal_Bool SAL_CALL
587 BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException )
589 return !rServiceName.compareToAscii(cBreakIterator);
592 Sequence< OUString > SAL_CALL
593 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException )
595 Sequence< OUString > aRet(1);
596 aRet[0] = OUString::createFromAscii(cBreakIterator);
597 return aRet;
600 } } } }