update dev300-m58
[ooovba.git] / i18npool / source / textconversion / textconversion_zh.cxx
blob297906d878489c3b202575018b851b04a34a3438
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: textconversion_zh.cxx,v $
10 * $Revision: 1.11.22.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <assert.h>
35 #include <textconversion.hxx>
36 #include <com/sun/star/i18n/TextConversionType.hpp>
37 #include <com/sun/star/i18n/TextConversionOption.hpp>
38 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
39 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
40 #include <i18nutil/x_rtl_ustring.h>
42 using namespace com::sun::star::lang;
43 using namespace com::sun::star::i18n;
44 using namespace com::sun::star::linguistic2;
45 using namespace com::sun::star::uno;
46 using namespace rtl;
48 namespace com { namespace sun { namespace star { namespace i18n {
50 TextConversion_zh::TextConversion_zh( const Reference < XMultiServiceFactory >& xMSF )
52 Reference < XInterface > xI;
53 xI = xMSF->createInstance(
54 OUString::createFromAscii( "com.sun.star.linguistic2.ConversionDictionaryList" ));
55 if ( xI.is() )
56 xI->queryInterface( getCppuType((const Reference< XConversionDictionaryList>*)0) ) >>= xCDL;
58 implementationName = "com.sun.star.i18n.TextConversion_zh";
61 sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
63 if (Data && Index) {
64 sal_Unicode address = Index[ch>>8];
65 if (address != 0xFFFF)
66 address = Data[address + (ch & 0xFF)];
67 return (address != 0xFFFF) ? address : ch;
68 } else {
69 return ch;
73 OUString SAL_CALL
74 TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions)
76 const sal_Unicode *Data;
77 const sal_uInt16 *Index;
79 if (toSChinese) {
80 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_T2S"))();
81 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_T2S"))();
82 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
83 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2V"))();
84 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2V"))();
85 } else {
86 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2T"))();
87 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2T"))();
90 rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength ); // defined in x_rtl_ustring.h
91 for (sal_Int32 i = 0; i < nLength; i++)
92 newStr->buffer[i] =
93 getOneCharConversion(aText[nStartPos+i], Data, Index);
94 return OUString( newStr->buffer, nLength);
97 OUString SAL_CALL
98 TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
100 sal_Int32 dictLen = 0;
101 sal_Int32 maxLen = 0;
102 const sal_uInt16 *index;
103 const sal_uInt16 *entry;
104 const sal_Unicode *charData;
105 const sal_uInt16 *charIndex;
106 sal_Bool one2one=sal_True;
108 const sal_Unicode *wordData = ((const sal_Unicode* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordData"))(dictLen);
109 if (toSChinese) {
110 index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
111 entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_T2S"))();
112 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_T2S"))();
113 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_T2S"))();
114 } else {
115 index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
116 entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_S2T"))();
117 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
118 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2V"))();
119 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2V"))();
120 } else {
121 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2T"))();
122 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2T"))();
126 if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
127 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
129 rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength * 2 ); // defined in x_rtl_ustring.h
130 sal_Int32 currPos = 0, count = 0;
131 while (currPos < nLength) {
132 sal_Int32 len = nLength - currPos;
133 sal_Bool found = sal_False;
134 if (len > maxLen)
135 len = maxLen;
136 for (; len > 0 && ! found; len--) {
137 OUString word = aText.copy(nStartPos + currPos, len);
138 sal_Int32 current = 0;
139 // user dictionary
140 if (xCDL.is()) {
141 Sequence < OUString > conversions;
142 try {
143 conversions = xCDL->queryConversions(word, 0, len,
144 aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
145 /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
146 nConversionOptions);
148 catch ( NoSupportException & ) {
149 // clear reference (when there is no user dictionary) in order
150 // to not always have to catch this exception again
151 // in further calls. (save time)
152 xCDL = 0;
154 catch (...) {
155 // catch all other exceptions to allow
156 // querying the system dictionary in the next line
158 if (conversions.getLength() > 0) {
159 if (offset.getLength() > 0) {
160 if (word.getLength() != conversions[0].getLength())
161 one2one=sal_False;
162 while (current < conversions[0].getLength()) {
163 offset[count] = nStartPos + currPos + (current *
164 word.getLength() / conversions[0].getLength());
165 newStr->buffer[count++] = conversions[0][current++];
167 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
168 } else {
169 while (current < conversions[0].getLength())
170 newStr->buffer[count++] = conversions[0][current++];
172 currPos += word.getLength();
173 found = sal_True;
177 if (!found && index[len+1] - index[len] > 0) {
178 sal_Int32 bottom = (sal_Int32) index[len];
179 sal_Int32 top = (sal_Int32) index[len+1] - 1;
181 while (bottom <= top && !found) {
182 current = (top + bottom) / 2;
183 const sal_Int32 result = word.compareTo(wordData + entry[current]);
184 if (result < 0)
185 top = current - 1;
186 else if (result > 0)
187 bottom = current + 1;
188 else {
189 if (toSChinese) // Traditionary/Simplified conversion,
190 for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
191 else // Simplified/Traditionary conversion, forwards search for next word
192 current = entry[current] + word.getLength() + 1;
193 sal_Int32 start=current;
194 if (offset.getLength() > 0) {
195 if (word.getLength() != OUString(&wordData[current]).getLength())
196 one2one=sal_False;
197 sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
198 while (wordData[current]) {
199 offset[count]=nStartPos + currPos + ((current-start) *
200 word.getLength() / convertedLength);
201 newStr->buffer[count++] = wordData[current++];
203 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
204 } else {
205 while (wordData[current])
206 newStr->buffer[count++] = wordData[current++];
208 currPos += word.getLength();
209 found = sal_True;
214 if (!found) {
215 if (offset.getLength() > 0)
216 offset[count]=nStartPos+currPos;
217 newStr->buffer[count++] =
218 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
219 currPos++;
222 if (offset.getLength() > 0)
223 offset.realloc(one2one ? 0 : count);
224 return OUString( newStr->buffer, count);
227 TextConversionResult SAL_CALL
228 TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
229 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
230 throw( RuntimeException, IllegalArgumentException, NoSupportException )
232 TextConversionResult result;
234 result.Candidates.realloc(1);
235 result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
236 result.Boundary.startPos = nStartPos;
237 result.Boundary.endPos = nStartPos + nLength;
239 return result;
242 OUString SAL_CALL
243 TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
244 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
245 throw( RuntimeException, IllegalArgumentException, NoSupportException )
247 if (rLocale.Language.equalsAscii("zh") &&
248 ( nConversionType == TextConversionType::TO_SCHINESE ||
249 nConversionType == TextConversionType::TO_TCHINESE) ) {
251 aLocale=rLocale;
252 sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
254 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
255 // char to char dictionary
256 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
257 else {
258 Sequence <sal_Int32> offset;
259 // word to word dictionary
260 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
262 } else
263 throw NoSupportException(); // Conversion type is not supported in this service.
266 OUString SAL_CALL
267 TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
268 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
269 throw( RuntimeException, IllegalArgumentException, NoSupportException )
271 if (rLocale.Language.equalsAscii("zh") &&
272 ( nConversionType == TextConversionType::TO_SCHINESE ||
273 nConversionType == TextConversionType::TO_TCHINESE) ) {
275 aLocale=rLocale;
276 sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
278 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
279 offset.realloc(0);
280 // char to char dictionary
281 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
282 } else {
283 if (offset.getLength() < 2*nLength)
284 offset.realloc(2*nLength);
285 // word to word dictionary
286 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
288 } else
289 throw NoSupportException(); // Conversion type is not supported in this service.
292 sal_Bool SAL_CALL
293 TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
294 throw( RuntimeException, IllegalArgumentException, NoSupportException )
296 return sal_False;
299 } } } }