1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
31 #define BREAKITERATOR_ALL
32 #include <breakiterator_cjk.hxx>
33 #include <localedata.hxx>
34 #include <i18nutil/unicode.hxx>
36 using namespace ::com::sun::star::uno
;
37 using namespace ::com::sun::star::lang
;
38 using namespace ::rtl
;
40 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
42 // ----------------------------------------------------
43 // class BreakIterator_CJK
44 // ----------------------------------------------------;
46 BreakIterator_CJK::BreakIterator_CJK() :
50 cBreakIterator
= "com.sun.star.i18n.BreakIterator_CJK";
54 BreakIterator_CJK::previousWord(const OUString
& text
, sal_Int32 anyPos
,
55 const lang::Locale
& nLocale
, sal_Int16 wordType
) throw(RuntimeException
)
58 result
= dict
->previousWord(text
, anyPos
, wordType
);
59 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
60 if (result
.endPos
- result
.startPos
!= 1 ||
61 getScriptType(text
, result
.startPos
) == ScriptType::ASIAN
)
63 result
= BreakIterator_Unicode::getWordBoundary(text
, result
.startPos
, nLocale
, wordType
, true);
64 if (result
.endPos
< anyPos
)
67 return BreakIterator_Unicode::previousWord(text
, anyPos
, nLocale
, wordType
);
71 BreakIterator_CJK::nextWord(const OUString
& text
, sal_Int32 anyPos
,
72 const lang::Locale
& nLocale
, sal_Int16 wordType
) throw(RuntimeException
)
75 result
= dict
->nextWord(text
, anyPos
, wordType
);
76 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
77 if (result
.endPos
- result
.startPos
!= 1 ||
78 getScriptType(text
, result
.startPos
) == ScriptType::ASIAN
)
80 result
= BreakIterator_Unicode::getWordBoundary(text
, result
.startPos
, nLocale
, wordType
, true);
81 if (result
.startPos
> anyPos
)
84 return BreakIterator_Unicode::nextWord(text
, anyPos
, nLocale
, wordType
);
88 BreakIterator_CJK::getWordBoundary( const OUString
& text
, sal_Int32 anyPos
,
89 const lang::Locale
& nLocale
, sal_Int16 wordType
, sal_Bool bDirection
)
90 throw(RuntimeException
)
93 result
= dict
->getWordBoundary(text
, anyPos
, wordType
, bDirection
);
94 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
95 if (result
.endPos
- result
.startPos
!= 1 ||
96 getScriptType(text
, result
.startPos
) == ScriptType::ASIAN
)
99 return BreakIterator_Unicode::getWordBoundary(text
, anyPos
, nLocale
, wordType
, bDirection
);
102 LineBreakResults SAL_CALL
BreakIterator_CJK::getLineBreak(
103 const OUString
& Text
, sal_Int32 nStartPos
,
104 const lang::Locale
& /*rLocale*/, sal_Int32
/*nMinBreakPos*/,
105 const LineBreakHyphenationOptions
& /*hOptions*/,
106 const LineBreakUserOptions
& bOptions
) throw(RuntimeException
)
108 LineBreakResults lbr
;
110 if (bOptions
.allowPunctuationOutsideMargin
&&
111 hangingCharacters
.indexOf(Text
[nStartPos
]) != -1 &&
112 (Text
.iterateCodePoints( &nStartPos
, 1), nStartPos
== Text
.getLength())) {
114 } else if (bOptions
.applyForbiddenRules
&& 0 < nStartPos
&& nStartPos
< Text
.getLength()) {
115 while (nStartPos
> 0 &&
116 (bOptions
.forbiddenBeginCharacters
.indexOf(Text
[nStartPos
]) != -1 ||
117 bOptions
.forbiddenEndCharacters
.indexOf(Text
[nStartPos
-1]) != -1))
118 Text
.iterateCodePoints( &nStartPos
, -1);
121 lbr
.breakIndex
= nStartPos
;
122 lbr
.breakType
= BreakType::WORDBOUNDARY
;
126 #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
127 // ----------------------------------------------------
128 // class BreakIterator_zh
129 // ----------------------------------------------------;
130 BreakIterator_zh::BreakIterator_zh()
132 dict
= new xdictionary("zh");
133 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
134 cBreakIterator
= "com.sun.star.i18n.BreakIterator_zh";
137 BreakIterator_zh::~BreakIterator_zh()
142 // ----------------------------------------------------
143 // class BreakIterator_zh_TW
144 // ----------------------------------------------------;
145 BreakIterator_zh_TW::BreakIterator_zh_TW()
147 dict
= new xdictionary("zh");
148 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
149 cBreakIterator
= "com.sun.star.i18n.BreakIterator_zh_TW";
152 BreakIterator_zh_TW::~BreakIterator_zh_TW()
157 // ----------------------------------------------------
158 // class BreakIterator_ja
159 // ----------------------------------------------------;
160 BreakIterator_ja::BreakIterator_ja()
162 dict
= new xdictionary("ja");
163 dict
->setJapaneseWordBreak();
164 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
165 cBreakIterator
= "com.sun.star.i18n.BreakIterator_ja";
168 BreakIterator_ja::~BreakIterator_ja()
173 // ----------------------------------------------------
174 // class BreakIterator_ko
175 // ----------------------------------------------------;
176 BreakIterator_ko::BreakIterator_ko()
178 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
179 cBreakIterator
= "com.sun.star.i18n.BreakIterator_ko";
182 BreakIterator_ko::~BreakIterator_ko()