1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: breakiterator_cjk.cxx,v $
10 * $Revision: 1.17.16.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #define BREAKITERATOR_ALL
35 #include <breakiterator_cjk.hxx>
36 #include <localedata.hxx>
37 #include <i18nutil/unicode.hxx>
39 using namespace ::com::sun::star::uno
;
40 using namespace ::com::sun::star::lang
;
41 using namespace ::rtl
;
43 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
45 // ----------------------------------------------------
46 // class BreakIterator_CJK
47 // ----------------------------------------------------;
49 BreakIterator_CJK::BreakIterator_CJK() :
53 cBreakIterator
= "com.sun.star.i18n.BreakIterator_CJK";
57 BreakIterator_CJK::previousWord(const OUString
& text
, sal_Int32 anyPos
,
58 const lang::Locale
& nLocale
, sal_Int16 wordType
) throw(RuntimeException
)
61 result
= dict
->previousWord(text
, anyPos
, wordType
);
62 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
63 if (result
.endPos
- result
.startPos
!= 1 ||
64 getScriptType(text
, result
.startPos
) == ScriptType::ASIAN
)
66 result
= BreakIterator_Unicode::getWordBoundary(text
, result
.startPos
, nLocale
, wordType
, true);
67 if (result
.endPos
< anyPos
)
70 return BreakIterator_Unicode::previousWord(text
, anyPos
, nLocale
, wordType
);
74 BreakIterator_CJK::nextWord(const OUString
& text
, sal_Int32 anyPos
,
75 const lang::Locale
& nLocale
, sal_Int16 wordType
) throw(RuntimeException
)
78 result
= dict
->nextWord(text
, anyPos
, wordType
);
79 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
80 if (result
.endPos
- result
.startPos
!= 1 ||
81 getScriptType(text
, result
.startPos
) == ScriptType::ASIAN
)
83 result
= BreakIterator_Unicode::getWordBoundary(text
, result
.startPos
, nLocale
, wordType
, true);
84 if (result
.startPos
> anyPos
)
87 return BreakIterator_Unicode::nextWord(text
, anyPos
, nLocale
, wordType
);
91 BreakIterator_CJK::getWordBoundary( const OUString
& text
, sal_Int32 anyPos
,
92 const lang::Locale
& nLocale
, sal_Int16 wordType
, sal_Bool bDirection
)
93 throw(RuntimeException
)
96 result
= dict
->getWordBoundary(text
, anyPos
, wordType
, bDirection
);
97 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
98 if (result
.endPos
- result
.startPos
!= 1 ||
99 getScriptType(text
, result
.startPos
) == ScriptType::ASIAN
)
102 return BreakIterator_Unicode::getWordBoundary(text
, anyPos
, nLocale
, wordType
, bDirection
);
105 LineBreakResults SAL_CALL
BreakIterator_CJK::getLineBreak(
106 const OUString
& Text
, sal_Int32 nStartPos
,
107 const lang::Locale
& /*rLocale*/, sal_Int32
/*nMinBreakPos*/,
108 const LineBreakHyphenationOptions
& /*hOptions*/,
109 const LineBreakUserOptions
& bOptions
) throw(RuntimeException
)
111 LineBreakResults lbr
;
113 if (bOptions
.allowPunctuationOutsideMargin
&&
114 hangingCharacters
.indexOf(Text
[nStartPos
]) != -1 &&
115 (Text
.iterateCodePoints( &nStartPos
, 1), nStartPos
== Text
.getLength())) {
117 } else if (bOptions
.applyForbiddenRules
&& 0 < nStartPos
&& nStartPos
< Text
.getLength()) {
118 while (nStartPos
> 0 &&
119 (bOptions
.forbiddenBeginCharacters
.indexOf(Text
[nStartPos
]) != -1 ||
120 bOptions
.forbiddenEndCharacters
.indexOf(Text
[nStartPos
-1]) != -1))
121 Text
.iterateCodePoints( &nStartPos
, -1);
124 lbr
.breakIndex
= nStartPos
;
125 lbr
.breakType
= BreakType::WORDBOUNDARY
;
129 #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
130 // ----------------------------------------------------
131 // class BreakIterator_zh
132 // ----------------------------------------------------;
133 BreakIterator_zh::BreakIterator_zh()
135 dict
= new xdictionary("zh");
136 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
137 cBreakIterator
= "com.sun.star.i18n.BreakIterator_zh";
140 BreakIterator_zh::~BreakIterator_zh()
145 // ----------------------------------------------------
146 // class BreakIterator_zh_TW
147 // ----------------------------------------------------;
148 BreakIterator_zh_TW::BreakIterator_zh_TW()
150 dict
= new xdictionary("zh");
151 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
152 cBreakIterator
= "com.sun.star.i18n.BreakIterator_zh_TW";
155 BreakIterator_zh_TW::~BreakIterator_zh_TW()
160 // ----------------------------------------------------
161 // class BreakIterator_ja
162 // ----------------------------------------------------;
163 BreakIterator_ja::BreakIterator_ja()
165 dict
= new xdictionary("ja");
166 dict
->setJapaneseWordBreak();
167 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
168 cBreakIterator
= "com.sun.star.i18n.BreakIterator_ja";
171 BreakIterator_ja::~BreakIterator_ja()
176 // ----------------------------------------------------
177 // class BreakIterator_ko
178 // ----------------------------------------------------;
179 BreakIterator_ko::BreakIterator_ko()
181 hangingCharacters
= LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
182 cBreakIterator
= "com.sun.star.i18n.BreakIterator_ko";
185 BreakIterator_ko::~BreakIterator_ko()