1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: breakiteratorImpl.cxx,v $
10 * $Revision: 1.27.4.2 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <breakiteratorImpl.hxx>
35 #include <unicode/uchar.h>
36 #include <rtl/ustrbuf.hxx>
38 using namespace ::com::sun::star::uno
;
39 using namespace ::com::sun::star::lang
;
40 using namespace ::rtl
;
42 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
44 BreakIteratorImpl::BreakIteratorImpl( const Reference
< XMultiServiceFactory
>& rxMSF
) : xMSF( rxMSF
)
48 BreakIteratorImpl::BreakIteratorImpl()
52 BreakIteratorImpl::~BreakIteratorImpl()
55 for (size_t l
= 0; l
< lookupTable
.size(); l
++)
56 delete lookupTable
[l
];
60 #define LBI getLocaleSpecificBreakIterator(rLocale)
62 sal_Int32 SAL_CALL
BreakIteratorImpl::nextCharacters( const OUString
& Text
, sal_Int32 nStartPos
,
63 const Locale
&rLocale
, sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
64 throw(RuntimeException
)
66 if (nCount
< 0) throw RuntimeException();
68 return LBI
->nextCharacters( Text
, nStartPos
, rLocale
, nCharacterIteratorMode
, nCount
, nDone
);
71 sal_Int32 SAL_CALL
BreakIteratorImpl::previousCharacters( const OUString
& Text
, sal_Int32 nStartPos
,
72 const Locale
& rLocale
, sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
73 throw(RuntimeException
)
75 if (nCount
< 0) throw RuntimeException();
77 return LBI
->previousCharacters( Text
, nStartPos
, rLocale
, nCharacterIteratorMode
, nCount
, nDone
);
80 #define isZWSP(c) (ch == 0x200B)
82 static sal_Int32
skipSpace(const OUString
& Text
, sal_Int32 nPos
, sal_Int32 len
, sal_Int16 rWordType
, sal_Bool bDirection
)
87 case WordType::ANYWORD_IGNOREWHITESPACES
:
89 while (nPos
< len
&& (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, 1)) || isZWSP(ch
))) nPos
=pos
;
91 while (nPos
> 0 && (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, -1)) || isZWSP(ch
))) nPos
=pos
;
93 case WordType::DICTIONARY_WORD
:
95 while (nPos
< len
&& (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, 1)) || isZWSP(ch
) ||
96 ! (ch
== 0x002E || u_isalnum(ch
)))) nPos
=pos
;
98 while (nPos
> 0 && (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, -1)) || isZWSP(ch
) ||
99 ! (ch
== 0x002E || u_isalnum(ch
)))) nPos
=pos
;
101 case WordType::WORD_COUNT
:
103 while (nPos
< len
&& (u_isUWhiteSpace(ch
= Text
.iterateCodePoints(&pos
, 1)) || isZWSP(ch
))) nPos
=pos
;
105 while (nPos
> 0 && (u_isUWhiteSpace(ch
= Text
.iterateCodePoints(&pos
, -1)) || isZWSP(ch
))) nPos
=pos
;
111 Boundary SAL_CALL
BreakIteratorImpl::nextWord( const OUString
& Text
, sal_Int32 nStartPos
,
112 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
114 sal_Int32 len
= Text
.getLength();
115 if( nStartPos
< 0 || len
== 0 )
116 result
.endPos
= result
.startPos
= 0;
117 else if (nStartPos
>= len
)
118 result
.endPos
= result
.startPos
= len
;
120 result
= LBI
->nextWord(Text
, nStartPos
, rLocale
, rWordType
);
122 nStartPos
= skipSpace(Text
, result
.startPos
, len
, rWordType
, sal_True
);
124 if ( nStartPos
!= result
.startPos
) {
125 if( nStartPos
>= len
)
126 result
.startPos
= result
.endPos
= len
;
128 result
= LBI
->getWordBoundary(Text
, nStartPos
, rLocale
, rWordType
, sal_True
);
129 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
130 if (result
.startPos
< nStartPos
) result
.startPos
= nStartPos
;
137 static inline sal_Bool SAL_CALL
isCJK( const Locale
& rLocale
) {
138 return rLocale
.Language
.equalsAscii("zh") || rLocale
.Language
.equalsAscii("ja") || rLocale
.Language
.equalsAscii("ko");
141 Boundary SAL_CALL
BreakIteratorImpl::previousWord( const OUString
& Text
, sal_Int32 nStartPos
,
142 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
144 sal_Int32 len
= Text
.getLength();
145 if( nStartPos
<= 0 || len
== 0 ) {
146 result
.endPos
= result
.startPos
= 0;
148 } else if (nStartPos
> len
) {
149 result
.endPos
= result
.startPos
= len
;
153 sal_Int32 nPos
= skipSpace(Text
, nStartPos
, len
, rWordType
, sal_False
);
155 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return
156 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
157 result
.startPos
= nPos
;
158 if (nPos
!= nStartPos
&& nPos
> 0 && !isCJK(rLocale
) && getScriptClass(Text
.iterateCodePoints(&nPos
, -1)) == ScriptType::ASIAN
) {
163 return LBI
->previousWord(Text
, result
.startPos
, rLocale
, rWordType
);
167 Boundary SAL_CALL
BreakIteratorImpl::getWordBoundary( const OUString
& Text
, sal_Int32 nPos
, const Locale
& rLocale
,
168 sal_Int16 rWordType
, sal_Bool bDirection
) throw(RuntimeException
)
170 sal_Int32 len
= Text
.getLength();
171 if( nPos
< 0 || len
== 0 )
172 result
.endPos
= result
.startPos
= 0;
174 result
.endPos
= result
.startPos
= len
;
176 sal_Int32 next
, prev
;
177 next
= skipSpace(Text
, nPos
, len
, rWordType
, sal_True
);
178 prev
= skipSpace(Text
, nPos
, len
, rWordType
, sal_False
);
179 if (prev
== 0 && next
== len
) {
180 result
.endPos
= result
.startPos
= nPos
;
181 } else if (prev
== 0 && ! bDirection
) {
182 result
.endPos
= result
.startPos
= 0;
183 } else if (next
== len
&& bDirection
) {
184 result
.endPos
= result
.startPos
= len
;
187 if (next
== nPos
&& next
!= len
)
188 bDirection
= sal_True
;
189 else if (prev
== nPos
&& prev
!= 0)
190 bDirection
= sal_False
;
192 nPos
= bDirection
? next
: prev
;
194 result
= LBI
->getWordBoundary(Text
, nPos
, rLocale
, rWordType
, bDirection
);
200 sal_Bool SAL_CALL
BreakIteratorImpl::isBeginWord( const OUString
& Text
, sal_Int32 nPos
,
201 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
203 sal_Int32 len
= Text
.getLength();
205 if (nPos
< 0 || nPos
>= len
) return sal_False
;
207 sal_Int32 tmp
= skipSpace(Text
, nPos
, len
, rWordType
, sal_True
);
209 if (tmp
!= nPos
) return sal_False
;
211 result
= getWordBoundary(Text
, nPos
, rLocale
, rWordType
, sal_True
);
213 return result
.startPos
== nPos
;
216 sal_Bool SAL_CALL
BreakIteratorImpl::isEndWord( const OUString
& Text
, sal_Int32 nPos
,
217 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
219 sal_Int32 len
= Text
.getLength();
221 if (nPos
<= 0 || nPos
> len
) return sal_False
;
223 sal_Int32 tmp
= skipSpace(Text
, nPos
, len
, rWordType
, sal_False
);
225 if (tmp
!= nPos
) return sal_False
;
227 result
= getWordBoundary(Text
, nPos
, rLocale
, rWordType
, sal_False
);
229 return result
.endPos
== nPos
;
232 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfSentence( const OUString
& Text
, sal_Int32 nStartPos
,
233 const Locale
&rLocale
) throw(RuntimeException
)
235 if (nStartPos
< 0 || nStartPos
> Text
.getLength())
237 if (Text
.getLength() == 0) return 0;
238 return LBI
->beginOfSentence(Text
, nStartPos
, rLocale
);
241 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfSentence( const OUString
& Text
, sal_Int32 nStartPos
,
242 const Locale
&rLocale
) throw(RuntimeException
)
244 if (nStartPos
< 0 || nStartPos
> Text
.getLength())
246 if (Text
.getLength() == 0) return 0;
247 return LBI
->endOfSentence(Text
, nStartPos
, rLocale
);
250 LineBreakResults SAL_CALL
BreakIteratorImpl::getLineBreak( const OUString
& Text
, sal_Int32 nStartPos
,
251 const Locale
& rLocale
, sal_Int32 nMinBreakPos
, const LineBreakHyphenationOptions
& hOptions
,
252 const LineBreakUserOptions
& bOptions
) throw(RuntimeException
)
254 return LBI
->getLineBreak(Text
, nStartPos
, rLocale
, nMinBreakPos
, hOptions
, bOptions
);
257 sal_Int16 SAL_CALL
BreakIteratorImpl::getScriptType( const OUString
& Text
, sal_Int32 nPos
)
258 throw(RuntimeException
)
260 return (nPos
< 0 || nPos
>= Text
.getLength()) ? ScriptType::WEAK
:
261 getScriptClass(Text
.iterateCodePoints(&nPos
, 0));
265 /** Increments/decrements position first, then obtains character.
266 @return current position, may be -1 or text length if string was consumed.
268 static sal_Int32 SAL_CALL
iterateCodePoints(const OUString
& Text
, sal_Int32
&nStartPos
, sal_Int32 inc
, sal_uInt32
& ch
) {
269 sal_Int32 nLen
= Text
.getLength();
270 if (nStartPos
+ inc
< 0 || nStartPos
+ inc
>= nLen
) {
272 nStartPos
= nStartPos
+ inc
< 0 ? -1 : nLen
;
274 ch
= Text
.iterateCodePoints(&nStartPos
, inc
);
276 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
277 // suspicious as if it cures a symptom.. anyway, had to add
278 // nStartPos < Text.getLength() to silence the (correct) assertion
279 // in rtl_uString_iterateCodePoints() if Text was one character
280 // (codepoint) only, made up of a surrogate pair.
281 //if (inc > 0 && nStartPos < Text.getLength())
282 // ch = Text.iterateCodePoints(&nStartPos, 0);
283 // With surrogates, nStartPos may actually point behind string
284 // now, even if inc is only +1
286 ch
= (nStartPos
< nLen
? Text
.iterateCodePoints(&nStartPos
, 0) : 0);
292 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfScript( const OUString
& Text
,
293 sal_Int32 nStartPos
, sal_Int16 ScriptType
) throw(RuntimeException
)
295 if (nStartPos
< 0 || nStartPos
>= Text
.getLength())
298 if(ScriptType
!= getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0)))
301 if (nStartPos
== 0) return 0;
303 while (iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0 && ScriptType
== getScriptClass(ch
)) {
304 if (nStartPos
== 0) return 0;
307 return iterateCodePoints(Text
, nStartPos
, 1, ch
);
310 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfScript( const OUString
& Text
,
311 sal_Int32 nStartPos
, sal_Int16 ScriptType
) throw(RuntimeException
)
313 if (nStartPos
< 0 || nStartPos
>= Text
.getLength())
316 if(ScriptType
!= getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0)))
319 sal_Int32 strLen
= Text
.getLength();
321 while(iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
322 sal_Int16 currentCharScriptType
= getScriptClass(ch
);
323 if(ScriptType
!= currentCharScriptType
&& currentCharScriptType
!= ScriptType::WEAK
)
329 sal_Int32 SAL_CALL
BreakIteratorImpl::previousScript( const OUString
& Text
,
330 sal_Int32 nStartPos
, sal_Int16 ScriptType
) throw(RuntimeException
)
334 if (nStartPos
> Text
.getLength())
335 nStartPos
= Text
.getLength();
337 sal_Int16 numberOfChange
= (ScriptType
== getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0))) ? 3 : 2;
340 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0) {
341 if ((((numberOfChange
% 2) == 0) ^ (ScriptType
!= getScriptClass(ch
))))
343 else if (nStartPos
== 0) {
344 if (numberOfChange
> 0)
347 Text
.iterateCodePoints(&nStartPos
, -1);
352 return numberOfChange
== 0 ? iterateCodePoints(Text
, nStartPos
, 1, ch
) : -1;
355 sal_Int32 SAL_CALL
BreakIteratorImpl::nextScript( const OUString
& Text
, sal_Int32 nStartPos
,
356 sal_Int16 ScriptType
) throw(RuntimeException
)
361 sal_Int32 strLen
= Text
.getLength();
362 if (nStartPos
> strLen
)
365 sal_Int16 numberOfChange
= (ScriptType
== getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0))) ? 2 : 1;
368 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
369 sal_Int16 currentCharScriptType
= getScriptClass(ch
);
370 if ((numberOfChange
== 1) ? (ScriptType
== currentCharScriptType
) :
371 (ScriptType
!= currentCharScriptType
&& currentCharScriptType
!= ScriptType::WEAK
))
374 return numberOfChange
== 0 ? nStartPos
: -1;
377 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
378 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
380 if (CharType
== CharType::ANY_CHAR
) return 0;
381 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
382 if (CharType
!= (sal_Int16
)u_charType( Text
.iterateCodePoints(&nStartPos
, 0))) return -1;
384 sal_Int32 nPos
=nStartPos
;
385 while(nStartPos
> 0 && CharType
== (sal_Int16
)u_charType(Text
.iterateCodePoints(&nPos
, -1))) { nStartPos
=nPos
; }
386 return nStartPos
; // begin of char block is inclusive
389 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
390 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
392 sal_Int32 strLen
= Text
.getLength();
394 if (CharType
== CharType::ANY_CHAR
) return strLen
; // end of char block is exclusive
395 if (nStartPos
< 0 || nStartPos
>= strLen
) return -1;
396 if (CharType
!= (sal_Int16
)u_charType(Text
.iterateCodePoints(&nStartPos
, 0))) return -1;
399 while(iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
&& CharType
== (sal_Int16
)u_charType(ch
)) {}
400 return nStartPos
; // end of char block is exclusive
403 sal_Int32 SAL_CALL
BreakIteratorImpl::nextCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
404 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
406 if (CharType
== CharType::ANY_CHAR
) return -1;
407 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
409 sal_Int16 numberOfChange
= (CharType
== (sal_Int16
)u_charType(Text
.iterateCodePoints(&nStartPos
, 0))) ? 2 : 1;
410 sal_Int32 strLen
= Text
.getLength();
413 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
414 if ((CharType
!= (sal_Int16
)u_charType(ch
)) ^ (numberOfChange
== 1))
417 return numberOfChange
== 0 ? nStartPos
: -1;
420 sal_Int32 SAL_CALL
BreakIteratorImpl::previousCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
421 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
423 if(CharType
== CharType::ANY_CHAR
) return -1;
424 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
426 sal_Int16 numberOfChange
= (CharType
== (sal_Int16
)u_charType(Text
.iterateCodePoints(&nStartPos
, 0))) ? 3 : 2;
429 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0) {
430 if (((numberOfChange
% 2) == 0) ^ (CharType
!= (sal_Int16
)u_charType(ch
)))
432 if (nStartPos
== 0 && numberOfChange
> 0) {
434 if (numberOfChange
== 0) return nStartPos
;
437 return numberOfChange
== 0 ? iterateCodePoints(Text
, nStartPos
, 1, ch
) : -1;
442 sal_Int16 SAL_CALL
BreakIteratorImpl::getWordType( const OUString
& /*Text*/,
443 sal_Int32
/*nPos*/, const Locale
& /*rLocale*/ ) throw(RuntimeException
)
454 static UBlock2Script scriptList
[] = {
455 {UBLOCK_NO_BLOCK
, UBLOCK_NO_BLOCK
, ScriptType::WEAK
},
456 {UBLOCK_BASIC_LATIN
, UBLOCK_ARMENIAN
, ScriptType::LATIN
},
457 {UBLOCK_HEBREW
, UBLOCK_MYANMAR
, ScriptType::COMPLEX
},
458 {UBLOCK_GEORGIAN
, UBLOCK_GEORGIAN
, ScriptType::LATIN
},
459 {UBLOCK_HANGUL_JAMO
, UBLOCK_HANGUL_JAMO
, ScriptType::ASIAN
},
460 {UBLOCK_ETHIOPIC
, UBLOCK_ETHIOPIC
, ScriptType::COMPLEX
},
461 {UBLOCK_CHEROKEE
, UBLOCK_RUNIC
, ScriptType::LATIN
},
462 {UBLOCK_KHMER
, UBLOCK_MONGOLIAN
, ScriptType::COMPLEX
},
463 {UBLOCK_LATIN_EXTENDED_ADDITIONAL
, UBLOCK_GREEK_EXTENDED
, ScriptType::LATIN
},
464 {UBLOCK_CJK_RADICALS_SUPPLEMENT
, UBLOCK_HANGUL_SYLLABLES
, ScriptType::ASIAN
},
465 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, ScriptType::ASIAN
},
466 {UBLOCK_ARABIC_PRESENTATION_FORMS_A
, UBLOCK_ARABIC_PRESENTATION_FORMS_A
, ScriptType::COMPLEX
},
467 {UBLOCK_CJK_COMPATIBILITY_FORMS
, UBLOCK_CJK_COMPATIBILITY_FORMS
, ScriptType::ASIAN
},
468 {UBLOCK_ARABIC_PRESENTATION_FORMS_B
, UBLOCK_ARABIC_PRESENTATION_FORMS_B
, ScriptType::COMPLEX
},
469 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, ScriptType::ASIAN
},
470 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
, ScriptType::ASIAN
},
471 {UBLOCK_CJK_STROKES
, UBLOCK_CJK_STROKES
, ScriptType::ASIAN
},
472 {UBLOCK_LATIN_EXTENDED_C
, UBLOCK_LATIN_EXTENDED_D
, ScriptType::LATIN
}
475 #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script)
477 sal_Int16
BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar
)
479 static sal_uInt32 lastChar
= 0;
480 static sal_Int16 nRet
= 0;
482 if (currentChar
!= lastChar
) {
483 lastChar
= currentChar
;
485 //JP 21.9.2001: handle specific characters - always as weak
486 // definition of 1 - this breaks a word
487 // 2 - this can be inside a word
488 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
489 if( 1 == currentChar
|| 2 == currentChar
|| 0x20 == currentChar
|| 0xA0 == currentChar
)
490 nRet
= ScriptType::WEAK
;
491 // workaround for Coptic
492 else if ( 0x2C80 <= currentChar
&& 0x2CE3 >= currentChar
)
493 nRet
= ScriptType::LATIN
;
495 UBlockCode block
=ublock_getCode(currentChar
);
497 for ( i
= 0; i
< scriptListCount
; i
++) {
498 if (block
<= scriptList
[i
].to
) break;
500 nRet
=(i
< scriptListCount
&& block
>= scriptList
[i
].from
) ? scriptList
[i
].script
: ScriptType::WEAK
;
506 static inline sal_Bool
operator == (const Locale
& l1
, const Locale
& l2
) {
507 return l1
.Language
== l2
.Language
&& l1
.Country
== l2
.Country
&& l1
.Variant
== l2
.Variant
;
510 sal_Bool SAL_CALL
BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString
& aLocaleName
) throw( RuntimeException
)
512 // to share service between same Language but different Country code, like zh_CN and zh_TW
513 for (size_t l
= 0; l
< lookupTable
.size(); l
++) {
514 lookupTableItem
*listItem
= lookupTable
[l
];
515 if (aLocaleName
== listItem
->aLocale
.Language
) {
521 Reference
< uno::XInterface
> xI
= xMSF
->createInstance(
522 OUString::createFromAscii("com.sun.star.i18n.BreakIterator_") + aLocaleName
);
525 xI
->queryInterface( getCppuType((const Reference
< XBreakIterator
>*)0) ) >>= xBI
;
527 lookupTable
.push_back(new lookupTableItem(Locale(aLocaleName
, aLocaleName
, aLocaleName
), xBI
));
534 Reference
< XBreakIterator
> SAL_CALL
535 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale
& rLocale
) throw (RuntimeException
)
537 if (xBI
.is() && rLocale
== aLocale
)
539 else if (xMSF
.is()) {
542 for (size_t i
= 0; i
< lookupTable
.size(); i
++) {
543 lookupTableItem
*listItem
= lookupTable
[i
];
544 if (rLocale
== listItem
->aLocale
)
545 return xBI
= listItem
->xBI
;
548 sal_Unicode under
= (sal_Unicode
)'_';
550 sal_Int32 l
= rLocale
.Language
.getLength();
551 sal_Int32 c
= rLocale
.Country
.getLength();
552 sal_Int32 v
= rLocale
.Variant
.getLength();
553 OUStringBuffer
aBuf(l
+c
+v
+3);
555 if ((l
> 0 && c
> 0 && v
> 0 &&
556 // load service with name <base>_<lang>_<country>_<varian>
557 createLocaleSpecificBreakIterator(aBuf
.append(rLocale
.Language
).append(under
).append(
558 rLocale
.Country
).append(under
).append(rLocale
.Variant
).makeStringAndClear())) ||
560 // load service with name <base>_<lang>_<country>
561 createLocaleSpecificBreakIterator(aBuf
.append(rLocale
.Language
).append(under
).append(
562 rLocale
.Country
).makeStringAndClear())) ||
563 (l
> 0 && c
> 0 && rLocale
.Language
.compareToAscii("zh") == 0 &&
564 (rLocale
.Country
.compareToAscii("HK") == 0 ||
565 rLocale
.Country
.compareToAscii("MO") == 0) &&
566 // if the country code is HK or MO, one more step to try TW.
567 createLocaleSpecificBreakIterator(aBuf
.append(rLocale
.Language
).append(under
).appendAscii(
568 "TW").makeStringAndClear())) ||
570 // load service with name <base>_<lang>
571 createLocaleSpecificBreakIterator(rLocale
.Language
)) ||
572 // load default service with name <base>_Unicode
573 createLocaleSpecificBreakIterator(OUString::createFromAscii("Unicode"))) {
574 lookupTable
.push_back( new lookupTableItem(aLocale
, xBI
) );
578 throw RuntimeException();
581 const sal_Char cBreakIterator
[] = "com.sun.star.i18n.BreakIterator";
584 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException
)
586 return OUString::createFromAscii(cBreakIterator
);
590 BreakIteratorImpl::supportsService(const OUString
& rServiceName
) throw( RuntimeException
)
592 return !rServiceName
.compareToAscii(cBreakIterator
);
595 Sequence
< OUString
> SAL_CALL
596 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException
)
598 Sequence
< OUString
> aRet(1);
599 aRet
[0] = OUString::createFromAscii(cBreakIterator
);