1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <breakiteratorImpl.hxx>
22 #include <unicode/uchar.h>
23 #include <i18nutil/unicode.hxx>
24 #include <rtl/ustrbuf.hxx>
26 using namespace ::com::sun::star::uno
;
27 using namespace ::com::sun::star::lang
;
28 using namespace ::rtl
;
30 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
32 BreakIteratorImpl::BreakIteratorImpl( const Reference
< XComponentContext
>& rxContext
) : m_xContext( rxContext
)
36 BreakIteratorImpl::BreakIteratorImpl()
40 BreakIteratorImpl::~BreakIteratorImpl()
43 for (size_t l
= 0; l
< lookupTable
.size(); l
++)
44 delete lookupTable
[l
];
48 #define LBI getLocaleSpecificBreakIterator(rLocale)
50 sal_Int32 SAL_CALL
BreakIteratorImpl::nextCharacters( const OUString
& Text
, sal_Int32 nStartPos
,
51 const Locale
&rLocale
, sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
52 throw(RuntimeException
)
54 if (nCount
< 0) throw RuntimeException();
56 return LBI
->nextCharacters( Text
, nStartPos
, rLocale
, nCharacterIteratorMode
, nCount
, nDone
);
59 sal_Int32 SAL_CALL
BreakIteratorImpl::previousCharacters( const OUString
& Text
, sal_Int32 nStartPos
,
60 const Locale
& rLocale
, sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
61 throw(RuntimeException
)
63 if (nCount
< 0) throw RuntimeException();
65 return LBI
->previousCharacters( Text
, nStartPos
, rLocale
, nCharacterIteratorMode
, nCount
, nDone
);
68 #define isZWSP(c) (ch == 0x200B)
70 static sal_Int32
skipSpace(const OUString
& Text
, sal_Int32 nPos
, sal_Int32 len
, sal_Int16 rWordType
, sal_Bool bDirection
)
75 case WordType::ANYWORD_IGNOREWHITESPACES
:
77 while (nPos
< len
&& (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, 1)) || isZWSP(ch
))) nPos
=pos
;
79 while (nPos
> 0 && (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, -1)) || isZWSP(ch
))) nPos
=pos
;
81 case WordType::DICTIONARY_WORD
:
83 while (nPos
< len
&& (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, 1)) || isZWSP(ch
) ||
84 ! (ch
== 0x002E || u_isalnum(ch
)))) nPos
=pos
;
86 while (nPos
> 0 && (u_isWhitespace(ch
= Text
.iterateCodePoints(&pos
, -1)) || isZWSP(ch
) ||
87 ! (ch
== 0x002E || u_isalnum(ch
)))) nPos
=pos
;
89 case WordType::WORD_COUNT
:
91 while (nPos
< len
&& (u_isUWhiteSpace(ch
= Text
.iterateCodePoints(&pos
, 1)) || isZWSP(ch
))) nPos
=pos
;
93 while (nPos
> 0 && (u_isUWhiteSpace(ch
= Text
.iterateCodePoints(&pos
, -1)) || isZWSP(ch
))) nPos
=pos
;
99 Boundary SAL_CALL
BreakIteratorImpl::nextWord( const OUString
& Text
, sal_Int32 nStartPos
,
100 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
102 sal_Int32 len
= Text
.getLength();
103 if( nStartPos
< 0 || len
== 0 )
104 result
.endPos
= result
.startPos
= 0;
105 else if (nStartPos
>= len
)
106 result
.endPos
= result
.startPos
= len
;
108 result
= LBI
->nextWord(Text
, nStartPos
, rLocale
, rWordType
);
110 nStartPos
= skipSpace(Text
, result
.startPos
, len
, rWordType
, sal_True
);
112 if ( nStartPos
!= result
.startPos
) {
113 if( nStartPos
>= len
)
114 result
.startPos
= result
.endPos
= len
;
116 result
= LBI
->getWordBoundary(Text
, nStartPos
, rLocale
, rWordType
, sal_True
);
117 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
118 if (result
.startPos
< nStartPos
) result
.startPos
= nStartPos
;
125 static inline sal_Bool SAL_CALL
isCJK( const Locale
& rLocale
) {
126 return rLocale
.Language
== "zh" || rLocale
.Language
== "ja" || rLocale
.Language
== "ko";
129 Boundary SAL_CALL
BreakIteratorImpl::previousWord( const OUString
& Text
, sal_Int32 nStartPos
,
130 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
132 sal_Int32 len
= Text
.getLength();
133 if( nStartPos
<= 0 || len
== 0 ) {
134 result
.endPos
= result
.startPos
= 0;
136 } else if (nStartPos
> len
) {
137 result
.endPos
= result
.startPos
= len
;
141 sal_Int32 nPos
= skipSpace(Text
, nStartPos
, len
, rWordType
, sal_False
);
143 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return
144 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
145 result
.startPos
= nPos
;
146 if (nPos
!= nStartPos
&& nPos
> 0 && !isCJK(rLocale
) && getScriptClass(Text
.iterateCodePoints(&nPos
, -1)) == ScriptType::ASIAN
) {
151 return LBI
->previousWord(Text
, result
.startPos
, rLocale
, rWordType
);
155 Boundary SAL_CALL
BreakIteratorImpl::getWordBoundary( const OUString
& Text
, sal_Int32 nPos
, const Locale
& rLocale
,
156 sal_Int16 rWordType
, sal_Bool bDirection
) throw(RuntimeException
)
158 sal_Int32 len
= Text
.getLength();
159 if( nPos
< 0 || len
== 0 )
160 result
.endPos
= result
.startPos
= 0;
162 result
.endPos
= result
.startPos
= len
;
164 sal_Int32 next
, prev
;
165 next
= skipSpace(Text
, nPos
, len
, rWordType
, sal_True
);
166 prev
= skipSpace(Text
, nPos
, len
, rWordType
, sal_False
);
167 if (prev
== 0 && next
== len
) {
168 result
.endPos
= result
.startPos
= nPos
;
169 } else if (prev
== 0 && ! bDirection
) {
170 result
.endPos
= result
.startPos
= 0;
171 } else if (next
== len
&& bDirection
) {
172 result
.endPos
= result
.startPos
= len
;
175 if (next
== nPos
&& next
!= len
)
176 bDirection
= sal_True
;
177 else if (prev
== nPos
&& prev
!= 0)
178 bDirection
= sal_False
;
180 nPos
= bDirection
? next
: prev
;
182 result
= LBI
->getWordBoundary(Text
, nPos
, rLocale
, rWordType
, bDirection
);
188 sal_Bool SAL_CALL
BreakIteratorImpl::isBeginWord( const OUString
& Text
, sal_Int32 nPos
,
189 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
191 sal_Int32 len
= Text
.getLength();
193 if (nPos
< 0 || nPos
>= len
) return sal_False
;
195 sal_Int32 tmp
= skipSpace(Text
, nPos
, len
, rWordType
, sal_True
);
197 if (tmp
!= nPos
) return sal_False
;
199 result
= getWordBoundary(Text
, nPos
, rLocale
, rWordType
, sal_True
);
201 return result
.startPos
== nPos
;
204 sal_Bool SAL_CALL
BreakIteratorImpl::isEndWord( const OUString
& Text
, sal_Int32 nPos
,
205 const Locale
& rLocale
, sal_Int16 rWordType
) throw(RuntimeException
)
207 sal_Int32 len
= Text
.getLength();
209 if (nPos
<= 0 || nPos
> len
) return sal_False
;
211 sal_Int32 tmp
= skipSpace(Text
, nPos
, len
, rWordType
, sal_False
);
213 if (tmp
!= nPos
) return sal_False
;
215 result
= getWordBoundary(Text
, nPos
, rLocale
, rWordType
, sal_False
);
217 return result
.endPos
== nPos
;
220 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfSentence( const OUString
& Text
, sal_Int32 nStartPos
,
221 const Locale
&rLocale
) throw(RuntimeException
)
223 if (nStartPos
< 0 || nStartPos
> Text
.getLength())
225 if (Text
.isEmpty()) return 0;
226 return LBI
->beginOfSentence(Text
, nStartPos
, rLocale
);
229 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfSentence( const OUString
& Text
, sal_Int32 nStartPos
,
230 const Locale
&rLocale
) throw(RuntimeException
)
232 if (nStartPos
< 0 || nStartPos
> Text
.getLength())
234 if (Text
.isEmpty()) return 0;
235 return LBI
->endOfSentence(Text
, nStartPos
, rLocale
);
238 LineBreakResults SAL_CALL
BreakIteratorImpl::getLineBreak( const OUString
& Text
, sal_Int32 nStartPos
,
239 const Locale
& rLocale
, sal_Int32 nMinBreakPos
, const LineBreakHyphenationOptions
& hOptions
,
240 const LineBreakUserOptions
& bOptions
) throw(RuntimeException
)
242 return LBI
->getLineBreak(Text
, nStartPos
, rLocale
, nMinBreakPos
, hOptions
, bOptions
);
245 sal_Int16 SAL_CALL
BreakIteratorImpl::getScriptType( const OUString
& Text
, sal_Int32 nPos
)
246 throw(RuntimeException
)
248 return (nPos
< 0 || nPos
>= Text
.getLength()) ? ScriptType::WEAK
:
249 getScriptClass(Text
.iterateCodePoints(&nPos
, 0));
253 /** Increments/decrements position first, then obtains character.
254 @return current position, may be -1 or text length if string was consumed.
256 static sal_Int32 SAL_CALL
iterateCodePoints(const OUString
& Text
, sal_Int32
&nStartPos
, sal_Int32 inc
, sal_uInt32
& ch
) {
257 sal_Int32 nLen
= Text
.getLength();
258 if (nStartPos
+ inc
< 0 || nStartPos
+ inc
>= nLen
) {
260 nStartPos
= nStartPos
+ inc
< 0 ? -1 : nLen
;
262 ch
= Text
.iterateCodePoints(&nStartPos
, inc
);
264 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
265 // suspicious as if it cures a symptom.. anyway, had to add
266 // nStartPos < Text.getLength() to silence the (correct) assertion
267 // in rtl_uString_iterateCodePoints() if Text was one character
268 // (codepoint) only, made up of a surrogate pair.
269 //if (inc > 0 && nStartPos < Text.getLength())
270 // ch = Text.iterateCodePoints(&nStartPos, 0);
271 // With surrogates, nStartPos may actually point behind string
272 // now, even if inc is only +1
274 ch
= (nStartPos
< nLen
? Text
.iterateCodePoints(&nStartPos
, 0) : 0);
280 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfScript( const OUString
& Text
,
281 sal_Int32 nStartPos
, sal_Int16 ScriptType
) throw(RuntimeException
)
283 if (nStartPos
< 0 || nStartPos
>= Text
.getLength())
286 if(ScriptType
!= getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0)))
289 if (nStartPos
== 0) return 0;
291 while (iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0 && ScriptType
== getScriptClass(ch
)) {
292 if (nStartPos
== 0) return 0;
295 return iterateCodePoints(Text
, nStartPos
, 1, ch
);
298 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfScript( const OUString
& Text
,
299 sal_Int32 nStartPos
, sal_Int16 ScriptType
) throw(RuntimeException
)
301 if (nStartPos
< 0 || nStartPos
>= Text
.getLength())
304 if(ScriptType
!= getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0)))
307 sal_Int32 strLen
= Text
.getLength();
309 while(iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
310 sal_Int16 currentCharScriptType
= getScriptClass(ch
);
311 if(ScriptType
!= currentCharScriptType
&& currentCharScriptType
!= ScriptType::WEAK
)
317 sal_Int32 SAL_CALL
BreakIteratorImpl::previousScript( const OUString
& Text
,
318 sal_Int32 nStartPos
, sal_Int16 ScriptType
) throw(RuntimeException
)
322 if (nStartPos
> Text
.getLength())
323 nStartPos
= Text
.getLength();
325 sal_Int16 numberOfChange
= (ScriptType
== getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0))) ? 3 : 2;
328 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0) {
329 if ((((numberOfChange
% 2) == 0) ^ (ScriptType
!= getScriptClass(ch
))))
331 else if (nStartPos
== 0) {
332 if (numberOfChange
> 0)
335 Text
.iterateCodePoints(&nStartPos
, -1);
340 return numberOfChange
== 0 ? iterateCodePoints(Text
, nStartPos
, 1, ch
) : -1;
343 sal_Int32 SAL_CALL
BreakIteratorImpl::nextScript( const OUString
& Text
, sal_Int32 nStartPos
,
344 sal_Int16 ScriptType
) throw(RuntimeException
)
349 sal_Int32 strLen
= Text
.getLength();
350 if (nStartPos
>= strLen
)
353 sal_Int16 numberOfChange
= (ScriptType
== getScriptClass(Text
.iterateCodePoints(&nStartPos
, 0))) ? 2 : 1;
356 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
357 sal_Int16 currentCharScriptType
= getScriptClass(ch
);
358 if ((numberOfChange
== 1) ? (ScriptType
== currentCharScriptType
) :
359 (ScriptType
!= currentCharScriptType
&& currentCharScriptType
!= ScriptType::WEAK
))
362 return numberOfChange
== 0 ? nStartPos
: -1;
365 sal_Int32 SAL_CALL
BreakIteratorImpl::beginOfCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
366 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
368 if (CharType
== CharType::ANY_CHAR
) return 0;
369 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
370 if (CharType
!= (sal_Int16
)u_charType( Text
.iterateCodePoints(&nStartPos
, 0))) return -1;
372 sal_Int32 nPos
=nStartPos
;
373 while(nStartPos
> 0 && CharType
== (sal_Int16
)u_charType(Text
.iterateCodePoints(&nPos
, -1))) { nStartPos
=nPos
; }
374 return nStartPos
; // begin of char block is inclusive
377 sal_Int32 SAL_CALL
BreakIteratorImpl::endOfCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
378 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
380 sal_Int32 strLen
= Text
.getLength();
382 if (CharType
== CharType::ANY_CHAR
) return strLen
; // end of char block is exclusive
383 if (nStartPos
< 0 || nStartPos
>= strLen
) return -1;
384 if (CharType
!= (sal_Int16
)u_charType(Text
.iterateCodePoints(&nStartPos
, 0))) return -1;
387 while(iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
&& CharType
== (sal_Int16
)u_charType(ch
)) {}
388 return nStartPos
; // end of char block is exclusive
391 sal_Int32 SAL_CALL
BreakIteratorImpl::nextCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
392 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
394 if (CharType
== CharType::ANY_CHAR
) return -1;
395 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
397 sal_Int16 numberOfChange
= (CharType
== (sal_Int16
)u_charType(Text
.iterateCodePoints(&nStartPos
, 0))) ? 2 : 1;
398 sal_Int32 strLen
= Text
.getLength();
401 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, 1, ch
) < strLen
) {
402 if ((CharType
!= (sal_Int16
)u_charType(ch
)) ^ (numberOfChange
== 1))
405 return numberOfChange
== 0 ? nStartPos
: -1;
408 sal_Int32 SAL_CALL
BreakIteratorImpl::previousCharBlock( const OUString
& Text
, sal_Int32 nStartPos
,
409 const Locale
& /*rLocale*/, sal_Int16 CharType
) throw(RuntimeException
)
411 if(CharType
== CharType::ANY_CHAR
) return -1;
412 if (nStartPos
< 0 || nStartPos
>= Text
.getLength()) return -1;
414 sal_Int16 numberOfChange
= (CharType
== (sal_Int16
)u_charType(Text
.iterateCodePoints(&nStartPos
, 0))) ? 3 : 2;
417 while (numberOfChange
> 0 && iterateCodePoints(Text
, nStartPos
, -1, ch
) >= 0) {
418 if (((numberOfChange
% 2) == 0) ^ (CharType
!= (sal_Int16
)u_charType(ch
)))
420 if (nStartPos
== 0 && numberOfChange
> 0) {
422 if (numberOfChange
== 0) return nStartPos
;
425 return numberOfChange
== 0 ? iterateCodePoints(Text
, nStartPos
, 1, ch
) : -1;
430 sal_Int16 SAL_CALL
BreakIteratorImpl::getWordType( const OUString
& /*Text*/,
431 sal_Int32
/*nPos*/, const Locale
& /*rLocale*/ ) throw(RuntimeException
)
438 sal_Int16
getScriptClassByUAX24Script(sal_uInt32 currentChar
)
440 int32_t script
= u_getIntPropertyValue(currentChar
, UCHAR_SCRIPT
);
441 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode
>(script
));
451 static const UBlock2Script scriptList
[] =
453 {UBLOCK_NO_BLOCK
, UBLOCK_NO_BLOCK
, ScriptType::WEAK
},
454 {UBLOCK_BASIC_LATIN
, UBLOCK_ARMENIAN
, ScriptType::LATIN
},
455 {UBLOCK_HEBREW
, UBLOCK_MYANMAR
, ScriptType::COMPLEX
},
456 {UBLOCK_GEORGIAN
, UBLOCK_GEORGIAN
, ScriptType::LATIN
},
457 {UBLOCK_HANGUL_JAMO
, UBLOCK_HANGUL_JAMO
, ScriptType::ASIAN
},
458 {UBLOCK_ETHIOPIC
, UBLOCK_ETHIOPIC
, ScriptType::COMPLEX
},
459 {UBLOCK_CHEROKEE
, UBLOCK_RUNIC
, ScriptType::LATIN
},
460 {UBLOCK_KHMER
, UBLOCK_MONGOLIAN
, ScriptType::COMPLEX
},
461 {UBLOCK_LATIN_EXTENDED_ADDITIONAL
, UBLOCK_GREEK_EXTENDED
, ScriptType::LATIN
},
462 {UBLOCK_NUMBER_FORMS
, UBLOCK_NUMBER_FORMS
, ScriptType::WEAK
},
463 {UBLOCK_CJK_RADICALS_SUPPLEMENT
, UBLOCK_HANGUL_SYLLABLES
, ScriptType::ASIAN
},
464 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, ScriptType::ASIAN
},
465 {UBLOCK_ARABIC_PRESENTATION_FORMS_A
, UBLOCK_ARABIC_PRESENTATION_FORMS_A
, ScriptType::COMPLEX
},
466 {UBLOCK_CJK_COMPATIBILITY_FORMS
, UBLOCK_CJK_COMPATIBILITY_FORMS
, ScriptType::ASIAN
},
467 {UBLOCK_ARABIC_PRESENTATION_FORMS_B
, UBLOCK_ARABIC_PRESENTATION_FORMS_B
, ScriptType::COMPLEX
},
468 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, ScriptType::ASIAN
},
469 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
, ScriptType::ASIAN
},
470 {UBLOCK_CJK_STROKES
, UBLOCK_CJK_STROKES
, ScriptType::ASIAN
},
471 {UBLOCK_LATIN_EXTENDED_C
, UBLOCK_LATIN_EXTENDED_D
, ScriptType::LATIN
}
474 #define scriptListCount SAL_N_ELEMENTS(scriptList)
476 //always sets rScriptType
478 //returns true for characters historically explicitly assigned to
481 //returns false for characters that historically implicitly assigned to
483 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar
, sal_Int16
&rScriptType
)
486 //handle specific characters always as weak:
487 // 0x01 - this breaks a word
488 // 0x02 - this can be inside a word
489 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
490 if( 0x01 == currentChar
|| 0x02 == currentChar
|| 0x20 == currentChar
|| 0xA0 == currentChar
)
491 rScriptType
= ScriptType::WEAK
;
492 // workaround for Coptic
493 else if ( 0x2C80 <= currentChar
&& 0x2CE3 >= currentChar
)
494 rScriptType
= ScriptType::LATIN
;
497 UBlockCode block
=ublock_getCode(currentChar
);
499 while (i
< scriptListCount
)
501 if (block
<= scriptList
[i
].to
)
505 if (i
< scriptListCount
&& block
>= scriptList
[i
].from
)
506 rScriptType
= scriptList
[i
].script
;
509 rScriptType
= ScriptType::WEAK
;
517 sal_Int16
BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar
)
519 static sal_uInt32 lastChar
= 0;
520 static sal_Int16 nRet
= 0;
522 if (currentChar
!= lastChar
)
524 lastChar
= currentChar
;
526 if (!getCompatibilityScriptClassByBlock(currentChar
, nRet
))
527 nRet
= getScriptClassByUAX24Script(currentChar
);
533 static inline sal_Bool
operator == (const Locale
& l1
, const Locale
& l2
) {
534 return l1
.Language
== l2
.Language
&& l1
.Country
== l2
.Country
&& l1
.Variant
== l2
.Variant
;
537 sal_Bool SAL_CALL
BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString
& aLocaleName
) throw( RuntimeException
)
539 // to share service between same Language but different Country code, like zh_CN and zh_TW
540 for (size_t l
= 0; l
< lookupTable
.size(); l
++) {
541 lookupTableItem
*listItem
= lookupTable
[l
];
542 if (aLocaleName
== listItem
->aLocale
.Language
) {
548 Reference
< uno::XInterface
> xI
= m_xContext
->getServiceManager()->createInstanceWithContext(
549 OUString("com.sun.star.i18n.BreakIterator_") + aLocaleName
, m_xContext
);
552 xBI
.set(xI
, UNO_QUERY
);
554 lookupTable
.push_back(new lookupTableItem(Locale(aLocaleName
, aLocaleName
, aLocaleName
), xBI
));
561 Reference
< XBreakIterator
> SAL_CALL
562 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale
& rLocale
) throw (RuntimeException
)
564 if (xBI
.is() && rLocale
== aLocale
)
566 else if (m_xContext
.is()) {
569 for (size_t i
= 0; i
< lookupTable
.size(); i
++) {
570 lookupTableItem
*listItem
= lookupTable
[i
];
571 if (rLocale
== listItem
->aLocale
)
572 return xBI
= listItem
->xBI
;
575 sal_Unicode under
= (sal_Unicode
)'_';
577 sal_Int32 l
= rLocale
.Language
.getLength();
578 sal_Int32 c
= rLocale
.Country
.getLength();
579 sal_Int32 v
= rLocale
.Variant
.getLength();
580 OUStringBuffer
aBuf(l
+c
+v
+3);
582 if ((l
> 0 && c
> 0 && v
> 0 &&
583 // load service with name <base>_<lang>_<country>_<varian>
584 createLocaleSpecificBreakIterator(aBuf
.append(rLocale
.Language
).append(under
).append(
585 rLocale
.Country
).append(under
).append(rLocale
.Variant
).makeStringAndClear())) ||
587 // load service with name <base>_<lang>_<country>
588 createLocaleSpecificBreakIterator(aBuf
.append(rLocale
.Language
).append(under
).append(
589 rLocale
.Country
).makeStringAndClear())) ||
590 (l
> 0 && c
> 0 && rLocale
.Language
.equalsAscii("zh") &&
591 (rLocale
.Country
.equalsAscii("HK") ||
592 rLocale
.Country
.equalsAscii("MO") ) &&
593 // if the country code is HK or MO, one more step to try TW.
594 createLocaleSpecificBreakIterator(aBuf
.append(rLocale
.Language
).append(under
).appendAscii(
595 "TW").makeStringAndClear())) ||
597 // load service with name <base>_<lang>
598 createLocaleSpecificBreakIterator(rLocale
.Language
)) ||
599 // load default service with name <base>_Unicode
600 createLocaleSpecificBreakIterator(OUString("Unicode"))) {
601 lookupTable
.push_back( new lookupTableItem(aLocale
, xBI
) );
605 throw RuntimeException();
608 const sal_Char cBreakIterator
[] = "com.sun.star.i18n.BreakIterator";
611 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException
)
613 return OUString::createFromAscii(cBreakIterator
);
617 BreakIteratorImpl::supportsService(const OUString
& rServiceName
) throw( RuntimeException
)
619 return rServiceName
.equalsAscii(cBreakIterator
);
622 Sequence
< OUString
> SAL_CALL
623 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException
)
625 Sequence
< OUString
> aRet(1);
626 aRet
[0] = OUString::createFromAscii(cBreakIterator
);
632 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */