merged tag ooo/OOO330_m14
[LibreOffice.git] / i18npool / source / breakiterator / breakiterator_cjk.cxx
blob16d7d133753806d3b701ca7d5c3c1ee25ac1769d
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
31 #define BREAKITERATOR_ALL
32 #include <breakiterator_cjk.hxx>
33 #include <localedata.hxx>
34 #include <i18nutil/unicode.hxx>
36 using namespace ::com::sun::star::uno;
37 using namespace ::com::sun::star::lang;
38 using namespace ::rtl;
40 namespace com { namespace sun { namespace star { namespace i18n {
42 // ----------------------------------------------------
43 // class BreakIterator_CJK
44 // ----------------------------------------------------;
46 BreakIterator_CJK::BreakIterator_CJK() :
47 dict( NULL ),
48 hangingCharacters()
50 cBreakIterator = "com.sun.star.i18n.BreakIterator_CJK";
53 Boundary SAL_CALL
54 BreakIterator_CJK::previousWord(const OUString& text, sal_Int32 anyPos,
55 const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
57 if (dict) {
58 result = dict->previousWord(text, anyPos, wordType);
59 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
60 if (result.endPos - result.startPos != 1 ||
61 getScriptType(text, result.startPos) == ScriptType::ASIAN)
62 return result;
63 result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
64 if (result.endPos < anyPos)
65 return result;
67 return BreakIterator_Unicode::previousWord(text, anyPos, nLocale, wordType);
70 Boundary SAL_CALL
71 BreakIterator_CJK::nextWord(const OUString& text, sal_Int32 anyPos,
72 const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
74 if (dict) {
75 result = dict->nextWord(text, anyPos, wordType);
76 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
77 if (result.endPos - result.startPos != 1 ||
78 getScriptType(text, result.startPos) == ScriptType::ASIAN)
79 return result;
80 result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
81 if (result.startPos > anyPos)
82 return result;
84 return BreakIterator_Unicode::nextWord(text, anyPos, nLocale, wordType);
87 Boundary SAL_CALL
88 BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos,
89 const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection )
90 throw(RuntimeException)
92 if (dict) {
93 result = dict->getWordBoundary(text, anyPos, wordType, bDirection);
94 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
95 if (result.endPos - result.startPos != 1 ||
96 getScriptType(text, result.startPos) == ScriptType::ASIAN)
97 return result;
99 return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection);
102 LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
103 const OUString& Text, sal_Int32 nStartPos,
104 const lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/,
105 const LineBreakHyphenationOptions& /*hOptions*/,
106 const LineBreakUserOptions& bOptions ) throw(RuntimeException)
108 LineBreakResults lbr;
110 if (bOptions.allowPunctuationOutsideMargin &&
111 hangingCharacters.indexOf(Text[nStartPos]) != -1 &&
112 (Text.iterateCodePoints( &nStartPos, 1), nStartPos == Text.getLength())) {
113 ; // do nothing
114 } else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) {
115 while (nStartPos > 0 &&
116 (bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 ||
117 bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1))
118 Text.iterateCodePoints( &nStartPos, -1);
121 lbr.breakIndex = nStartPos;
122 lbr.breakType = BreakType::WORDBOUNDARY;
123 return lbr;
126 #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
127 // ----------------------------------------------------
128 // class BreakIterator_zh
129 // ----------------------------------------------------;
130 BreakIterator_zh::BreakIterator_zh()
132 dict = new xdictionary("zh");
133 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
134 cBreakIterator = "com.sun.star.i18n.BreakIterator_zh";
137 BreakIterator_zh::~BreakIterator_zh()
139 delete dict;
142 // ----------------------------------------------------
143 // class BreakIterator_zh_TW
144 // ----------------------------------------------------;
145 BreakIterator_zh_TW::BreakIterator_zh_TW()
147 dict = new xdictionary("zh");
148 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
149 cBreakIterator = "com.sun.star.i18n.BreakIterator_zh_TW";
152 BreakIterator_zh_TW::~BreakIterator_zh_TW()
154 delete dict;
157 // ----------------------------------------------------
158 // class BreakIterator_ja
159 // ----------------------------------------------------;
160 BreakIterator_ja::BreakIterator_ja()
162 dict = new xdictionary("ja");
163 dict->setJapaneseWordBreak();
164 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
165 cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
168 BreakIterator_ja::~BreakIterator_ja()
170 delete dict;
173 // ----------------------------------------------------
174 // class BreakIterator_ko
175 // ----------------------------------------------------;
176 BreakIterator_ko::BreakIterator_ko()
178 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
179 cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
182 BreakIterator_ko::~BreakIterator_ko()
186 } } } }