Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / breakiterator / breakiteratorImpl.cxx
blob4f2207a8441371a32b22bc597cac3d21087577dd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <breakiteratorImpl.hxx>
22 #include <unicode/uchar.h>
23 #include <i18nutil/unicode.hxx>
24 #include <rtl/ustrbuf.hxx>
26 using namespace ::com::sun::star::uno;
27 using namespace ::com::sun::star::lang;
28 using namespace ::rtl;
30 namespace com { namespace sun { namespace star { namespace i18n {
32 BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
36 BreakIteratorImpl::BreakIteratorImpl()
40 BreakIteratorImpl::~BreakIteratorImpl()
42 // Clear lookuptable
43 for (size_t l = 0; l < lookupTable.size(); l++)
44 delete lookupTable[l];
45 lookupTable.clear();
48 #define LBI getLocaleSpecificBreakIterator(rLocale)
50 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
51 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
52 throw(RuntimeException)
54 if (nCount < 0) throw RuntimeException();
56 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
59 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
60 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
61 throw(RuntimeException)
63 if (nCount < 0) throw RuntimeException();
65 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
68 #define isZWSP(c) (ch == 0x200B)
70 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, sal_Bool bDirection)
72 sal_uInt32 ch=0;
73 sal_Int32 pos=nPos;
74 switch (rWordType) {
75 case WordType::ANYWORD_IGNOREWHITESPACES:
76 if (bDirection)
77 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
78 else
79 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
80 break;
81 case WordType::DICTIONARY_WORD:
82 if (bDirection)
83 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) ||
84 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
85 else
86 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
87 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
88 break;
89 case WordType::WORD_COUNT:
90 if (bDirection)
91 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
92 else
93 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
94 break;
96 return nPos;
99 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
100 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
102 sal_Int32 len = Text.getLength();
103 if( nStartPos < 0 || len == 0 )
104 result.endPos = result.startPos = 0;
105 else if (nStartPos >= len)
106 result.endPos = result.startPos = len;
107 else {
108 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
110 nStartPos = skipSpace(Text, result.startPos, len, rWordType, sal_True);
112 if ( nStartPos != result.startPos) {
113 if( nStartPos >= len )
114 result.startPos = result.endPos = len;
115 else {
116 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True);
117 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
118 if (result.startPos < nStartPos) result.startPos = nStartPos;
122 return result;
125 static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) {
126 return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
129 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
130 const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException)
132 sal_Int32 len = Text.getLength();
133 if( nStartPos <= 0 || len == 0 ) {
134 result.endPos = result.startPos = 0;
135 return result;
136 } else if (nStartPos > len) {
137 result.endPos = result.startPos = len;
138 return result;
141 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, sal_False);
143 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return
144 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
145 result.startPos = nPos;
146 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
147 result.endPos = -1;
148 return result;
151 return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
155 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
156 sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException)
158 sal_Int32 len = Text.getLength();
159 if( nPos < 0 || len == 0 )
160 result.endPos = result.startPos = 0;
161 else if (nPos > len)
162 result.endPos = result.startPos = len;
163 else {
164 sal_Int32 next, prev;
165 next = skipSpace(Text, nPos, len, rWordType, sal_True);
166 prev = skipSpace(Text, nPos, len, rWordType, sal_False);
167 if (prev == 0 && next == len) {
168 result.endPos = result.startPos = nPos;
169 } else if (prev == 0 && ! bDirection) {
170 result.endPos = result.startPos = 0;
171 } else if (next == len && bDirection) {
172 result.endPos = result.startPos = len;
173 } else {
174 if (next != prev) {
175 if (next == nPos && next != len)
176 bDirection = sal_True;
177 else if (prev == nPos && prev != 0)
178 bDirection = sal_False;
179 else
180 nPos = bDirection ? next : prev;
182 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
185 return result;
188 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
189 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
191 sal_Int32 len = Text.getLength();
193 if (nPos < 0 || nPos >= len) return sal_False;
195 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_True);
197 if (tmp != nPos) return sal_False;
199 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True);
201 return result.startPos == nPos;
204 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
205 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException)
207 sal_Int32 len = Text.getLength();
209 if (nPos <= 0 || nPos > len) return sal_False;
211 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_False);
213 if (tmp != nPos) return sal_False;
215 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False);
217 return result.endPos == nPos;
220 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
221 const Locale &rLocale ) throw(RuntimeException)
223 if (nStartPos < 0 || nStartPos > Text.getLength())
224 return -1;
225 if (Text.isEmpty()) return 0;
226 return LBI->beginOfSentence(Text, nStartPos, rLocale);
229 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
230 const Locale &rLocale ) throw(RuntimeException)
232 if (nStartPos < 0 || nStartPos > Text.getLength())
233 return -1;
234 if (Text.isEmpty()) return 0;
235 return LBI->endOfSentence(Text, nStartPos, rLocale);
238 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
239 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
240 const LineBreakUserOptions& bOptions ) throw(RuntimeException)
242 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
245 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
246 throw(RuntimeException)
248 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
249 getScriptClass(Text.iterateCodePoints(&nPos, 0));
253 /** Increments/decrements position first, then obtains character.
254 @return current position, may be -1 or text length if string was consumed.
256 static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
257 sal_Int32 nLen = Text.getLength();
258 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
259 ch = 0;
260 nStartPos = nStartPos + inc < 0 ? -1 : nLen;
261 } else {
262 ch = Text.iterateCodePoints(&nStartPos, inc);
263 // Fix for #i80436#.
264 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat
265 // suspicious as if it cures a symptom.. anyway, had to add
266 // nStartPos < Text.getLength() to silence the (correct) assertion
267 // in rtl_uString_iterateCodePoints() if Text was one character
268 // (codepoint) only, made up of a surrogate pair.
269 //if (inc > 0 && nStartPos < Text.getLength())
270 // ch = Text.iterateCodePoints(&nStartPos, 0);
271 // With surrogates, nStartPos may actually point behind string
272 // now, even if inc is only +1
273 if (inc > 0)
274 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
276 return nStartPos;
280 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
281 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
283 if (nStartPos < 0 || nStartPos >= Text.getLength())
284 return -1;
286 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
287 return -1;
289 if (nStartPos == 0) return 0;
290 sal_uInt32 ch=0;
291 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
292 if (nStartPos == 0) return 0;
295 return iterateCodePoints(Text, nStartPos, 1, ch);
298 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
299 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
301 if (nStartPos < 0 || nStartPos >= Text.getLength())
302 return -1;
304 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
305 return -1;
307 sal_Int32 strLen = Text.getLength();
308 sal_uInt32 ch=0;
309 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
310 sal_Int16 currentCharScriptType = getScriptClass(ch);
311 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
312 break;
314 return nStartPos;
317 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
318 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException)
320 if (nStartPos < 0)
321 return -1;
322 if (nStartPos > Text.getLength())
323 nStartPos = Text.getLength();
325 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
327 sal_uInt32 ch=0;
328 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
329 if ((((numberOfChange % 2) == 0) ^ (ScriptType != getScriptClass(ch))))
330 numberOfChange--;
331 else if (nStartPos == 0) {
332 if (numberOfChange > 0)
333 numberOfChange--;
334 if (nStartPos > 0)
335 Text.iterateCodePoints(&nStartPos, -1);
336 else
337 return -1;
340 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
343 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
344 sal_Int16 ScriptType ) throw(RuntimeException)
347 if (nStartPos < 0)
348 nStartPos = 0;
349 sal_Int32 strLen = Text.getLength();
350 if (nStartPos > strLen)
351 return -1;
353 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
355 sal_uInt32 ch=0;
356 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
357 sal_Int16 currentCharScriptType = getScriptClass(ch);
358 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
359 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
360 numberOfChange--;
362 return numberOfChange == 0 ? nStartPos : -1;
365 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
366 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
368 if (CharType == CharType::ANY_CHAR) return 0;
369 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
370 if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1;
372 sal_Int32 nPos=nStartPos;
373 while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; }
374 return nStartPos; // begin of char block is inclusive
377 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
378 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
380 sal_Int32 strLen = Text.getLength();
382 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
383 if (nStartPos < 0 || nStartPos >= strLen) return -1;
384 if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1;
386 sal_uInt32 ch=0;
387 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {}
388 return nStartPos; // end of char block is exclusive
391 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
392 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
394 if (CharType == CharType::ANY_CHAR) return -1;
395 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
397 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
398 sal_Int32 strLen = Text.getLength();
400 sal_uInt32 ch=0;
401 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
402 if ((CharType != (sal_Int16)u_charType(ch)) ^ (numberOfChange == 1))
403 numberOfChange--;
405 return numberOfChange == 0 ? nStartPos : -1;
408 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
409 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException)
411 if(CharType == CharType::ANY_CHAR) return -1;
412 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
414 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
416 sal_uInt32 ch=0;
417 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
418 if (((numberOfChange % 2) == 0) ^ (CharType != (sal_Int16)u_charType(ch)))
419 numberOfChange--;
420 if (nStartPos == 0 && numberOfChange > 0) {
421 numberOfChange--;
422 if (numberOfChange == 0) return nStartPos;
425 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
430 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
431 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException)
433 return 0;
436 namespace
438 sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
440 int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
441 return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
444 struct UBlock2Script
446 UBlockCode from;
447 UBlockCode to;
448 sal_Int16 script;
451 static UBlock2Script scriptList[] =
453 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
454 {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
455 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
456 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
457 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
458 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
459 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
460 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
461 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
462 {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
463 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
464 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
465 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
466 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
467 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
468 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
469 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
470 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
471 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
474 #define scriptListCount SAL_N_ELEMENTS(scriptList)
476 //always sets rScriptType
478 //returns true for characters historically explicitly assigned to
479 //latin/weak/asian
481 //returns false for characters that historically implicitly assigned to
482 //weak as unknown
483 bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
485 bool bKnown = true;
486 //handle specific characters always as weak:
487 // 0x01 - this breaks a word
488 // 0x02 - this can be inside a word
489 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
490 if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
491 rScriptType = ScriptType::WEAK;
492 // workaround for Coptic
493 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
494 rScriptType = ScriptType::LATIN;
495 else
497 UBlockCode block=ublock_getCode(currentChar);
498 size_t i = 0;
499 while (i < scriptListCount)
501 if (block <= scriptList[i].to)
502 break;
503 ++i;
505 if (i < scriptListCount && block >= scriptList[i].from)
506 rScriptType = scriptList[i].script;
507 else
509 rScriptType = ScriptType::WEAK;
510 bKnown = false;
513 return bKnown;
517 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
519 static sal_uInt32 lastChar = 0;
520 static sal_Int16 nRet = 0;
522 if (currentChar != lastChar)
524 lastChar = currentChar;
526 if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
527 nRet = getScriptClassByUAX24Script(currentChar);
530 return nRet;
533 static inline sal_Bool operator == (const Locale& l1, const Locale& l2) {
534 return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant;
537 sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException )
539 // to share service between same Language but different Country code, like zh_CN and zh_TW
540 for (size_t l = 0; l < lookupTable.size(); l++) {
541 lookupTableItem *listItem = lookupTable[l];
542 if (aLocaleName == listItem->aLocale.Language) {
543 xBI = listItem->xBI;
544 return sal_True;
548 Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
549 OUString("com.sun.star.i18n.BreakIterator_") + aLocaleName, m_xContext);
551 if ( xI.is() ) {
552 xI->queryInterface( getCppuType((const Reference< XBreakIterator>*)0) ) >>= xBI;
553 if (xBI.is()) {
554 lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI));
555 return sal_True;
558 return sal_False;
561 Reference < XBreakIterator > SAL_CALL
562 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException)
564 if (xBI.is() && rLocale == aLocale)
565 return xBI;
566 else if (m_xContext.is()) {
567 aLocale = rLocale;
569 for (size_t i = 0; i < lookupTable.size(); i++) {
570 lookupTableItem *listItem = lookupTable[i];
571 if (rLocale == listItem->aLocale)
572 return xBI = listItem->xBI;
575 sal_Unicode under = (sal_Unicode)'_';
577 sal_Int32 l = rLocale.Language.getLength();
578 sal_Int32 c = rLocale.Country.getLength();
579 sal_Int32 v = rLocale.Variant.getLength();
580 OUStringBuffer aBuf(l+c+v+3);
582 if ((l > 0 && c > 0 && v > 0 &&
583 // load service with name <base>_<lang>_<country>_<varian>
584 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
585 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
586 (l > 0 && c > 0 &&
587 // load service with name <base>_<lang>_<country>
588 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
589 rLocale.Country).makeStringAndClear())) ||
590 (l > 0 && c > 0 && rLocale.Language.compareToAscii("zh") == 0 &&
591 (rLocale.Country.compareToAscii("HK") == 0 ||
592 rLocale.Country.compareToAscii("MO") == 0) &&
593 // if the country code is HK or MO, one more step to try TW.
594 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii(
595 "TW").makeStringAndClear())) ||
596 (l > 0 &&
597 // load service with name <base>_<lang>
598 createLocaleSpecificBreakIterator(rLocale.Language)) ||
599 // load default service with name <base>_Unicode
600 createLocaleSpecificBreakIterator(OUString("Unicode"))) {
601 lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
602 return xBI;
605 throw RuntimeException();
608 const sal_Char cBreakIterator[] = "com.sun.star.i18n.BreakIterator";
610 OUString SAL_CALL
611 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException )
613 return OUString::createFromAscii(cBreakIterator);
616 sal_Bool SAL_CALL
617 BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException )
619 return !rServiceName.compareToAscii(cBreakIterator);
622 Sequence< OUString > SAL_CALL
623 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException )
625 Sequence< OUString > aRet(1);
626 aRet[0] = OUString::createFromAscii(cBreakIterator);
627 return aRet;
630 } } } }
632 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */